This is the formal analysis file for the “The Nature of Reasonableness” by by Kevin Tobia, Ivar R. Hannikainen, David Kamper, Guilherme Almeida, Piotr Bystranowski, Niek Strohmaier, Vilius Dranseika, Markus Kneer, Fernando Aguiar, Kristina Dolinina, Bartosz Janik, Eglė Lauraitytė, Alice Liefgreen, Maciej Próchnicki, Alejandro Rosas, Vivek Kumar Shukla, and Noel Struchiner (2025, Stanford Journal of International Law).
# Load Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stats)
library(ggplot2)
library(ggthemes)
library(corrplot)
## corrplot 0.95 loaded
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
DATA_PATH <- "/Users/dgkamper/Library/CloudStorage/GoogleDrive-dgkamper@gmail.com/My Drive/DGK Lab/Collaborations/Language and Law Laboratory/Analysis/LegalResonablenessAnalysis/Data_all_numeric.csv"
# Read the data from specified path
Data_All <- read.csv(DATA_PATH, stringsAsFactors = FALSE)
# Verify data structure
glimpse(Data_All)
## Rows: 2,426
## Columns: 42
## $ q1 <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ q2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q3 <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8…
## $ q4 <dbl> 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2…
## $ m <dbl> 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, …
## $ q6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q7 <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,…
## $ q8 <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, …
## $ q9 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q10 <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ q11 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q12 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q13 <dbl> 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,…
## $ q14 <chr> "30", "30", "30", "30", "30", "30", "30", "30", "30", "…
## $ q15 <dbl> 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,…
## $ q16 <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5…
## $ q17 <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ q18 <int> 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,…
## $ q19 <int> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,…
## $ q20 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q21 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q22 <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ q23 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ q24 <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5…
## $ q25 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q26 <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ q27 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q28 <dbl> 5, 5, 10, 50, 20, 5, 5, 10, 25, 60, 100, 80, 5, 20, 10,…
## $ q29 <dbl> 100, 50, 2, 25, 100, 100, 5, 50, 100, 100, 100, 100, 15…
## $ q30 <dbl> 3, 4, 2, 1, 6, 1, 2, 12, 26, 5, 0, 1, 8, 2, 4, 3, 4, 2,…
## $ q31 <dbl> 20, 750, 2, 12, 25, 0, 3, 100, 10, 30, 250, 50, 50, 10,…
## $ q32 <dbl> 5, 1, 2, 48, 24, 24, 55, 24, 24, 24, 24, 48, 24, 48, 12…
## $ q33 <chr> "0", "10", "2", "25", "4", "8", "2", "4", "4", "3.2", "…
## $ q34 <dbl> 95, 1, 2, 15, 50, 0, 5, 0, 95, 85, 100, 99, 50, 30, 15,…
## $ age <int> 18, 47, 28, 30, NA, NA, 30, 50, 47, 33, 0, 0, 30, 43, 4…
## $ Gender <chr> "Male", "Male", "Male", "Male", "Female", "Female", "Ma…
## $ ID <chr> "usa1", "usa2", "usa3", "usa4", "usa5", "usa6", "usa7",…
## $ Country_name <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA",…
## $ Condition_name <chr> "Reasonable", "Reasonable", "Reasonable", "Reasonable",…
## $ ID.1 <chr> "usa1", "usa2", "usa3", "usa4", "usa5", "usa6", "usa7",…
## $ Country <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Condition <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
# Split data by condition
Data_All_Average <- Data_All %>% filter(Condition_name == "Average")
Data_All_Ideal <- Data_All %>% filter(Condition_name == "Ideal")
Data_All_Reasonable <- Data_All %>% filter(Condition_name == "Reasonable")
# Exclude participants whose answer to Q19 does not equal 15 [comprehension/attention question].
Data_All_Average_Pass1 <- Data_All_Average %>% filter(q19 == 15)
Data_All_Ideal_Pass1 <- Data_All_Ideal %>% filter(q19 == 15)
Data_All_Reasonable_Pass1 <- Data_All_Reasonable %>% filter(q19 == 15)
# Print participant counts
condition_counts <- data.frame(
Condition = c("Average", "Ideal", "Reasonable"),
Original_Count = c(nrow(Data_All_Average), nrow(Data_All_Ideal), nrow(Data_All_Reasonable)),
After_Attention_Check = c(nrow(Data_All_Average_Pass1), nrow(Data_All_Ideal_Pass1), nrow(Data_All_Reasonable_Pass1)),
Excluded_Count = c(nrow(Data_All_Average) - nrow(Data_All_Average_Pass1),
nrow(Data_All_Ideal) - nrow(Data_All_Ideal_Pass1),
nrow(Data_All_Reasonable) - nrow(Data_All_Reasonable_Pass1)),
Exclusion_Rate = c((nrow(Data_All_Average) - nrow(Data_All_Average_Pass1))/nrow(Data_All_Average),
(nrow(Data_All_Ideal) - nrow(Data_All_Ideal_Pass1))/nrow(Data_All_Ideal),
(nrow(Data_All_Reasonable) - nrow(Data_All_Reasonable_Pass1))/nrow(Data_All_Reasonable))
)
# Format the exclusion rate as percentage
condition_counts$Exclusion_Rate <- scales::percent(condition_counts$Exclusion_Rate)
# Display the participant counts table
kable(condition_counts, caption = "Participant Counts Before and After Attention Check") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Condition | Original_Count | After_Attention_Check | Excluded_Count | Exclusion_Rate |
|---|---|---|---|---|
| Average | 815 | 793 | 22 | 2.70% |
| Ideal | 825 | 800 | 25 | 3.03% |
| Reasonable | 786 | 758 | 28 | 3.56% |
# Define the question columns - ensuring they actually exist in the data
question_cols <- c("q1", "q2", "q3", "q4", "m", "q6", "q7", "q8", "q9", "q10",
"q11", "q12", "q13", "q14", "q15", "q16", "q17", "q18",
"q20", "q21", "q22", "q23", "q24", "q25", "q26", "q27",
"q28", "q29", "q30", "q31", "q32", "q33", "q34")
# Check if all question columns exist in the data
existing_cols <- question_cols[question_cols %in% colnames(Data_All_Average_Pass1)]
missing_cols <- question_cols[!question_cols %in% colnames(Data_All_Average_Pass1)]
print("Existing question columns:")
## [1] "Existing question columns:"
print(existing_cols)
## [1] "q1" "q2" "q3" "q4" "m" "q6" "q7" "q8" "q9" "q10" "q11" "q12"
## [13] "q13" "q14" "q15" "q16" "q17" "q18" "q20" "q21" "q22" "q23" "q24" "q25"
## [25] "q26" "q27" "q28" "q29" "q30" "q31" "q32" "q33" "q34"
print("Missing question columns:")
## [1] "Missing question columns:"
print(missing_cols)
## character(0)
# Continue with only the existing columns
question_cols <- existing_cols
# Convert columns to numeric - using a safer approach
Data_All_Average_Pass1 <- Data_All_Average_Pass1 %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
Data_All_Ideal_Pass1 <- Data_All_Ideal_Pass1 %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
Data_All_Reasonable_Pass1 <- Data_All_Reasonable_Pass1 %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
# Verify the number of participants in each condition
cat("Ideal condition:", nrow(Data_All_Ideal_Pass1), "participants\n")
## Ideal condition: 800 participants
cat("Average condition:", nrow(Data_All_Average_Pass1), "participants\n")
## Average condition: 793 participants
cat("Reasonable condition:", nrow(Data_All_Reasonable_Pass1), "participants\n")
## Reasonable condition: 758 participants
For any response that is greater than 3 standard deviations from that question mean, exclude that response.
# Function to filter data based on 3 SD criterion
filter_data <- function(data, cols) {
# Pre-allocate a list to track excluded participants for each question
exclusions_by_question <- list()
# Original count
original_count <- nrow(data)
result <- data
for (col in cols) {
if (col %in% colnames(data)) {
# Calculate mean and standard deviation
avg <- mean(result[[col]], na.rm = TRUE)
s <- sd(result[[col]], na.rm = TRUE)
# Identify outliers
outlier_indices <- which(!is.na(result[[col]]) & abs(result[[col]] - avg) > 3 * s)
exclusions_by_question[[col]] <- length(outlier_indices)
# Filter out outliers
result <- result %>%
filter(is.na(!!sym(col)) | abs(!!sym(col) - avg) <= 3 * s)
}
}
# Final count
final_count <- nrow(result)
return(list(
filtered_data = result,
original_count = original_count,
final_count = final_count,
excluded_count = original_count - final_count,
exclusion_rate = (original_count - final_count) / original_count,
exclusions_by_question = exclusions_by_question
))
}
# Apply filtering and track exclusion counts
avg_filter_results <- filter_data(Data_All_Average_Pass1, question_cols)
ideal_filter_results <- filter_data(Data_All_Ideal_Pass1, question_cols)
reasonable_filter_results <- filter_data(Data_All_Reasonable_Pass1, question_cols)
# Display outlier exclusion summary
outlier_summary <- data.frame(
Condition = c("Average", "Ideal", "Reasonable"),
Original_Count = c(avg_filter_results$original_count,
ideal_filter_results$original_count,
reasonable_filter_results$original_count),
After_Outlier_Removal = c(avg_filter_results$final_count,
ideal_filter_results$final_count,
reasonable_filter_results$final_count),
Excluded_Count = c(avg_filter_results$excluded_count,
ideal_filter_results$excluded_count,
reasonable_filter_results$excluded_count),
Exclusion_Rate = c(avg_filter_results$exclusion_rate,
ideal_filter_results$exclusion_rate,
reasonable_filter_results$exclusion_rate)
)
# Format the exclusion rate as percentage
outlier_summary$Exclusion_Rate <- scales::percent(outlier_summary$Exclusion_Rate)
# Display the outlier exclusion summary table
kable(outlier_summary, caption = "Participant Counts Before and After Outlier Removal") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Condition | Original_Count | After_Outlier_Removal | Excluded_Count | Exclusion_Rate |
|---|---|---|---|---|
| Average | 793 | 609 | 184 | 23.20% |
| Ideal | 800 | 608 | 192 | 24.00% |
| Reasonable | 758 | 720 | 38 | 5.01% |
# Extract the filtered data
Data_All_Average_Filtered <- avg_filter_results$filtered_data
Data_All_Ideal_Filtered <- ideal_filter_results$filtered_data
Data_All_Reasonable_Filtered <- reasonable_filter_results$filtered_data
# Calculate means after excluding outliers
filtered_column_means_average <- sapply(Data_All_Average_Filtered[, question_cols], mean, na.rm = TRUE)
filtered_column_means_ideal <- sapply(Data_All_Ideal_Filtered[, question_cols], mean, na.rm = TRUE)
filtered_column_means_reasonable <- sapply(Data_All_Reasonable_Filtered[, question_cols], mean, na.rm = TRUE)
# Create a dataframe for the means
means_df <- data.frame(
Question = names(filtered_column_means_average),
Average = filtered_column_means_average,
Ideal = filtered_column_means_ideal,
Reasonable = filtered_column_means_reasonable
)
# Display the means for each question
kable(means_df, caption = "Mean Values for Each Question by Condition") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 4.717079 | 2.423138 | 2.000000 |
| q2 | q2 | 10.120066 | 3.508487 | 0.000000 |
| q3 | q3 | 4.425497 | 6.945092 | 8.000000 |
| q4 | q4 | 1932.197655 | 1647.105983 | 2500.000000 |
| m | m | 26.377667 | 41.317647 | 120.000000 |
| q6 | q6 | 27.802562 | 6.301167 | 0.000000 |
| q7 | q7 | 19.770132 | 6.589167 | 20.000000 |
| q8 | q8 | 8.612893 | 18.018303 | 100.000000 |
| q9 | q9 | 5.765728 | 11.359191 | 0.000000 |
| q10 | q10 | 5032.678623 | 8.432998 | 3.000000 |
| q11 | q11 | 5050.380471 | 7079.111932 | 0.000000 |
| q12 | q12 | 38.078602 | 8.936772 | 0.000000 |
| q13 | q13 | 59.506633 | 20.380235 | 25.000000 |
| q14 | q14 | 13.284132 | 3.547840 | 30.000000 |
| q15 | q15 | 11.635323 | 14.586236 | 25.000000 |
| q16 | q16 | 7.638550 | 7.958609 | 5.000000 |
| q17 | q17 | 208.070957 | 1.675530 | 3.000000 |
| q18 | q18 | 15.521523 | 9.793333 | 12.000000 |
| q20 | q20 | 26.018242 | 5.932231 | 0.000000 |
| q21 | q21 | 10.178713 | 8.181063 | 0.000000 |
| q22 | q22 | 13.893401 | 10.539496 | 1.000000 |
| q23 | q23 | 4.134615 | 4.432161 | 1.000000 |
| q24 | q24 | 46.867638 | 47.390252 | 5.000000 |
| q25 | q25 | 8980.698978 | 2641.582131 | 0.000000 |
| q26 | q26 | 12.584312 | 4.116415 | 3.000000 |
| q27 | q27 | 28.446488 | 8.343697 | 0.000000 |
| q28 | q28 | 15.162038 | 26.797274 | 22.767883 |
| q29 | q29 | 54.365320 | 72.998331 | 67.816395 |
| q30 | q30 | 32.157095 | 4.830236 | 7.575977 |
| q31 | q31 | 160.200000 | 58.560861 | 136.542155 |
| q32 | q32 | 44.029362 | 55.881313 | 44.316547 |
| q33 | q33 | 7.452260 | 3.735182 | 5.003173 |
| q34 | q34 | 53.374080 | 24.978115 | 41.688666 |
# Create a comparison table of means for plotting
means_long <- means_df %>%
pivot_longer(cols = c(Average, Ideal, Reasonable),
names_to = "Condition",
values_to = "Mean")
## Convert mean responses to a natural log scale.
log_filtered_column_means_average <- log(filtered_column_means_average)
log_filtered_column_means_ideal <- log(filtered_column_means_ideal)
log_filtered_column_means_reasonable <- log(filtered_column_means_reasonable)
# Check if we have any zeros before taking logs
print("Checking for zeros in means:")
## [1] "Checking for zeros in means:"
print("Zeros in average means:")
## [1] "Zeros in average means:"
print(names(filtered_column_means_average[filtered_column_means_average == 0]))
## character(0)
print("Zeros in ideal means:")
## [1] "Zeros in ideal means:"
print(names(filtered_column_means_ideal[filtered_column_means_ideal == 0]))
## character(0)
print("Zeros in reasonable means:")
## [1] "Zeros in reasonable means:"
print(names(filtered_column_means_reasonable[filtered_column_means_reasonable == 0]))
## [1] "q2" "q6" "q9" "q11" "q12" "q20" "q21" "q25" "q27"
safe_log <- function(x) {
# Replace zeros with a small constant
x[x == 0] <- 0.01
log(x)
}
# Take logs
log_filtered_column_means_average <- safe_log(filtered_column_means_average)
log_filtered_column_means_ideal <- safe_log(filtered_column_means_ideal)
log_filtered_column_means_reasonable <- safe_log(filtered_column_means_reasonable)
# Create a dataframe for the log means
log_means_df <- data.frame(
Question = names(log_filtered_column_means_average),
Average = log_filtered_column_means_average,
Ideal = log_filtered_column_means_ideal,
Reasonable = log_filtered_column_means_reasonable
)
# Prepare data for regression
dataforAICMeans <- data.frame(
Reasonable = log_filtered_column_means_reasonable,
Average = log_filtered_column_means_average,
Ideal = log_filtered_column_means_ideal
)
# Model I: Average predicts reasonable
Model1Means <- lm(Reasonable ~ Average, data = dataforAICMeans)
AIC1Means <- AIC(Model1Means)
r2_model1 <- summary(Model1Means)$r.squared
# Model II: Ideal predicts reasonable
Model2Means <- lm(Reasonable ~ Ideal, data = dataforAICMeans)
AIC2Means <- AIC(Model2Means)
r2_model2 <- summary(Model2Means)$r.squared
# Model III: Both average and ideal predict reasonable
Model3Means <- lm(Reasonable ~ Average + Ideal, data = dataforAICMeans)
AIC3Means <- AIC(Model3Means)
r2_model3 <- summary(Model3Means)$r.squared
# Create a regression summary dataframe
regression_summary <- data.frame(
Model = c("Average predicts Reasonable",
"Ideal predicts Reasonable",
"Average + Ideal predict Reasonable"),
AIC = c(AIC1Means, AIC2Means, AIC3Means),
R_squared = c(r2_model1, r2_model2, r2_model3)
)
# Display the regression summary table
kable(regression_summary, caption = "Regression Model Comparison") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Model | AIC | R_squared |
|---|---|---|
| Average predicts Reasonable | 184.0899 | 0.0010386 |
| Ideal predicts Reasonable | 183.9865 | 0.0041640 |
| Average + Ideal predict Reasonable | 185.5350 | 0.0176975 |
# Identify the best model based on AIC
best_model <- which.min(c(AIC1Means, AIC2Means, AIC3Means))
best_model_name <- c("Average → Reasonable", "Ideal → Reasonable", "Hybrid Model")[best_model]
# Create separate plots for Model 1 and Model 2
plot1 <- ggplot(dataforAICMeans, aes(x = Average, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Model 1: Average Predicts Reasonable",
subtitle = paste("AIC =", round(AIC1Means, 2), ", R² =", round(r2_model1, 3)),
x = "Log Average Rating",
y = "Log Reasonable Rating") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12)
)
plot2 <- ggplot(dataforAICMeans, aes(x = Ideal, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Model 2: Ideal Predicts Reasonable",
subtitle = paste("AIC =", round(AIC2Means, 2), ", R² =", round(r2_model2, 3)),
x = "Log Ideal Rating",
y = "Log Reasonable Rating") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12)
)
# Create Model 3 visualization - effects of Average while holding Ideal constant
new_data <- expand.grid(
Average = seq(min(dataforAICMeans$Average), max(dataforAICMeans$Average), length.out = 100),
Ideal = median(dataforAICMeans$Ideal) # Hold Ideal constant at median
)
new_data$predicted_reasonable <- predict(Model3Means, newdata = new_data)
plot3 <- ggplot(dataforAICMeans, aes(x = Average, y = Reasonable)) +
geom_point(size = 3, alpha = 0.5) +
geom_line(data = new_data, aes(y = predicted_reasonable), color = "blue", size = 1) +
labs(title = "Model 3: Effect of Average",
subtitle = "Controlling for Ideal (held at median)",
x = "Log Average Rating",
y = "Log Reasonable Rating") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12)
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Create Model 3 visualization - effects of Ideal while holding Average constant
new_data2 <- expand.grid(
Average = median(dataforAICMeans$Average), # Hold Average constant at median
Ideal = seq(min(dataforAICMeans$Ideal), max(dataforAICMeans$Ideal), length.out = 100)
)
new_data2$predicted_reasonable <- predict(Model3Means, newdata = new_data2)
plot4 <- ggplot(dataforAICMeans, aes(x = Ideal, y = Reasonable)) +
geom_point(size = 3, alpha = 0.5) +
geom_line(data = new_data2, aes(y = predicted_reasonable), color = "red", size = 1) +
labs(title = "Model 3: Effect of Ideal",
subtitle = "Controlling for Average (held at median)",
x = "Log Ideal Rating",
y = "Log Reasonable Rating") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12)
)
# Display plots individually
print(plot1)
## `geom_smooth()` using formula = 'y ~ x'
print(plot2)
## `geom_smooth()` using formula = 'y ~ x'
print(plot3)
print(plot4)
# Create an explanatory figure showing the relationship between means
means_summary <- data.frame(
Question = 1:length(filtered_column_means_average),
Average = filtered_column_means_average,
Ideal = filtered_column_means_ideal,
Reasonable = filtered_column_means_reasonable
) %>%
# Take a subset of questions for clearer visualization
slice(1:10)
# Convert to long format for plotting
means_summary_long <- means_summary %>%
pivot_longer(cols = c(Average, Ideal, Reasonable),
names_to = "Rating_Type",
values_to = "Value") %>%
mutate(Rating_Type = factor(Rating_Type, levels = c("Average", "Reasonable", "Ideal")))
# Create an explanatory plot
explanatory_plot <- ggplot(means_summary_long, aes(x = Rating_Type, y = Value, group = Question, color = factor(Question))) +
geom_line(size = 1) +
geom_point(size = 3) +
labs(title = "Relationship Between Average, Reasonable, and Ideal Ratings",
subtitle = "Example of first 10 questions showing how Reasonable often falls between Average and Ideal",
x = "Rating Type",
y = "Value",
color = "Question") +
theme_minimal() +
theme(
legend.position = "none",
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12)
)
# Display and save the explanatory plot
print(explanatory_plot)
ggsave("reasonable_between_average_ideal.png", explanatory_plot, width = 8, height = 6, dpi = 300)
# Create table of model coefficients
get_model_coefs <- function(model) {
coefs <- summary(model)$coefficients
data.frame(
Term = rownames(coefs),
Estimate = coefs[,1],
Std_Error = coefs[,2],
t_value = coefs[,3],
p_value = coefs[,4]
)
}
# Format p-values with asterisks for significance levels
format_pvalue <- function(p) {
if (p < 0.001) return("< 0.001 ***")
if (p < 0.01) return(paste0(round(p, 3), " **"))
if (p < 0.05) return(paste0(round(p, 3), " *"))
return(as.character(round(p, 3)))
}
# Get and format model coefficients
model3_coefs <- get_model_coefs(Model3Means)
model3_coefs$p_value <- sapply(model3_coefs$p_value, format_pvalue)
# Display the hybrid model coefficients
kable(model3_coefs, caption = "Hybrid Model Coefficients (Average + Ideal → Reasonable)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Term | Estimate | Std_Error | t_value | p_value | |
|---|---|---|---|---|---|
| (Intercept) | (Intercept) | 0.8034340 | 1.3519113 | 0.5942949 | 0.557 |
| Average | Average | -0.3039662 | 0.4728053 | -0.6428994 | 0.525 |
| Ideal | Ideal | 0.3583289 | 0.5023663 | 0.7132821 | 0.481 |
# Store analysis results (simplified)
analysis_1_results <- list(
regression_models = list(
model1 = Model1Means,
model2 = Model2Means,
model3 = Model3Means
),
regression_summary = regression_summary,
aic_values = c(AIC1 = AIC1Means, AIC2 = AIC2Means, AIC3 = AIC3Means)
)
# Function to check if reasonable is on the "ideal side" of average.
# That is, if the reasonable rating is as close or closer to the ideal than to the average.
is_ideal_side <- function(average, ideal, reasonable) {
ifelse(abs(reasonable - ideal) <= abs(reasonable - average), 1, 0)
}
# Apply the function to raw means
ideal_side_vector <- mapply(is_ideal_side,
filtered_column_means_average,
filtered_column_means_ideal,
filtered_column_means_reasonable)
# Apply the function to log-transformed means
ideal_side_vector_log <- mapply(is_ideal_side,
log_filtered_column_means_average,
log_filtered_column_means_ideal,
log_filtered_column_means_reasonable)
# Count how many questions have reasonableness on the "ideal side" of average
count_ideal_side <- sum(ideal_side_vector)
count_ideal_side_log <- sum(ideal_side_vector_log)
# Conduct binomial tests to assess whether the proportion is greater than 50%
binomial_result_ideal <- binom.test(count_ideal_side, length(question_cols), p = 0.5,
alternative = "greater")
binomial_result_ideal_log <- binom.test(count_ideal_side_log, length(question_cols), p = 0.5,
alternative = "greater")
# Create a summary table for the binomial test results
ideal_side_summary <- data.frame(
Data_Type = c("Raw Means", "Log Means"),
On_Ideal_Side_Count = c(count_ideal_side, count_ideal_side_log),
Total_Questions = c(length(question_cols), length(question_cols)),
Proportion = c(count_ideal_side/length(question_cols),
count_ideal_side_log/length(question_cols)),
p_value = c(binomial_result_ideal$p.value, binomial_result_ideal_log$p.value)
)
# Format proportions and p-values for display
ideal_side_summary$Proportion <- scales::percent(ideal_side_summary$Proportion)
ideal_side_summary$p_value <- sapply(ideal_side_summary$p_value, format_pvalue)
# Display the binomial test results table
kable(ideal_side_summary,
caption = "Binomial Test Results: Is Reasonableness on the Ideal Side of Average?") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Data_Type | On_Ideal_Side_Count | Total_Questions | Proportion | p_value |
|---|---|---|---|---|
| Raw Means | 22 | 33 | 67% | 0.04 * |
| Log Means | 22 | 33 | 67% | 0.04 * |
# Function to check if reasonable is on the "average side" of ideal.
# That is, if the reasonable rating is as close or closer to the average than to the ideal.
is_average_side <- function(average, ideal, reasonable) {
ifelse(abs(reasonable - average) <= abs(reasonable - ideal), 1, 0)
}
# Apply the function to raw means
average_side_vector <- mapply(is_average_side,
filtered_column_means_average,
filtered_column_means_ideal,
filtered_column_means_reasonable)
# Apply the function to log-transformed means
average_side_vector_log <- mapply(is_average_side,
log_filtered_column_means_average,
log_filtered_column_means_ideal,
log_filtered_column_means_reasonable)
# Count how many questions have reasonableness on the "average side" of ideal
count_average_side <- sum(average_side_vector)
count_average_side_log <- sum(average_side_vector_log)
# Conduct binomial tests to assess whether the proportion is greater than 50%
binomial_result_avg_side <- binom.test(count_average_side, length(question_cols), p = 0.5,
alternative = "greater")
binomial_result_avg_side_log <- binom.test(count_average_side_log, length(question_cols), p = 0.5,
alternative = "greater")
# Create a summary table for the binomial test results
average_side_summary <- data.frame(
Data_Type = c("Raw Means", "Log Means"),
On_Average_Side_Count = c(count_average_side, count_average_side_log),
Total_Questions = c(length(question_cols), length(question_cols)),
Proportion = c(count_average_side/length(question_cols),
count_average_side_log/length(question_cols)),
p_value = c(binomial_result_avg_side$p.value, binomial_result_avg_side_log$p.value)
)
# Format proportions and p-values for display
average_side_summary$Proportion <- scales::percent(average_side_summary$Proportion)
average_side_summary$p_value <- sapply(average_side_summary$p_value, format_pvalue)
# Display the binomial test results table
kable(average_side_summary,
caption = "Binomial Test Results: Is Reasonableness on the Average Side of Ideal?") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Data_Type | On_Average_Side_Count | Total_Questions | Proportion | p_value |
|---|---|---|---|---|
| Raw Means | 11 | 33 | 33% | 0.982 |
| Log Means | 11 | 33 | 33% | 0.982 |
# Function to check if reasonable falls between average and ideal.
# This is a robust check: if (reasonable - average) and (reasonable - ideal) have opposite signs or are zero,
# then reasonable is between (or equal to one of) the anchors.
is_both_sides <- function(average, ideal, reasonable) {
ifelse((reasonable - average) * (reasonable - ideal) <= 0, 1, 0)
}
# Apply the function to raw means
both_sides_vector <- mapply(is_both_sides,
filtered_column_means_average,
filtered_column_means_ideal,
filtered_column_means_reasonable)
# Apply the function to log-transformed means
both_sides_vector_log <- mapply(is_both_sides,
log_filtered_column_means_average,
log_filtered_column_means_ideal,
log_filtered_column_means_reasonable)
# Count how many questions have reasonableness on both sides
count_both_sides <- sum(both_sides_vector)
count_both_sides_log <- sum(both_sides_vector_log)
# Conduct binomial tests with 1/3 as the chance rate (as specified in the analysis plan)
binomial_result_both_sides <- binom.test(count_both_sides, length(question_cols), p = 1/3,
alternative = "two.sided")
binomial_result_both_sides_log <- binom.test(count_both_sides_log, length(question_cols), p = 1/3,
alternative = "two.sided")
# Create a summary table for the binomial test results
both_sides_summary <- data.frame(
Data_Type = c("Raw Means", "Log Means"),
On_Both_Sides_Count = c(count_both_sides, count_both_sides_log),
Total_Questions = c(length(question_cols), length(question_cols)),
Proportion = c(count_both_sides/length(question_cols),
count_both_sides_log/length(question_cols)),
Expected_Chance = c(1/3, 1/3),
p_value = c(binomial_result_both_sides$p.value, binomial_result_both_sides_log$p.value)
)
# Format proportions and p-values for display
both_sides_summary$Proportion <- scales::percent(both_sides_summary$Proportion)
both_sides_summary$Expected_Chance <- scales::percent(both_sides_summary$Expected_Chance)
both_sides_summary$p_value <- sapply(both_sides_summary$p_value, format_pvalue)
# Display the binomial test results table
kable(both_sides_summary,
caption = "Binomial Test Results: Is Reasonableness Between Average and Ideal?") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Data_Type | On_Both_Sides_Count | Total_Questions | Proportion | Expected_Chance | p_value |
|---|---|---|---|---|---|
| Raw Means | 10 | 33 | 30% | 33% | 0.854 |
| Log Means | 10 | 33 | 30% | 33% | 0.854 |
# Create a table showing the results for each question
question_position_summary <- data.frame(
Question = names(filtered_column_means_average),
Average_Value = filtered_column_means_average,
Ideal_Value = filtered_column_means_ideal,
Reasonable_Value = filtered_column_means_reasonable,
On_Ideal_Side = ideal_side_vector,
On_Average_Side = average_side_vector,
Between_Average_And_Ideal = both_sides_vector
)
# Categorize each question's position
question_position_summary$Position <- "Other"
question_position_summary$Position[question_position_summary$Between_Average_And_Ideal == 1] <- "Between Average and Ideal"
question_position_summary$Position[question_position_summary$On_Ideal_Side == 1 &
question_position_summary$On_Average_Side == 0] <- "Beyond Ideal"
question_position_summary$Position[question_position_summary$On_Ideal_Side == 0 &
question_position_summary$On_Average_Side == 1] <- "Beyond Average"
# Display the question position summary
kable(question_position_summary[, c("Question", "Average_Value", "Ideal_Value", "Reasonable_Value", "Position")],
caption = "Position of Reasonable Rating Relative to Average and Ideal for Each Question") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average_Value | Ideal_Value | Reasonable_Value | Position | |
|---|---|---|---|---|---|
| q1 | q1 | 4.717079 | 2.423138 | 2.000000 | Beyond Ideal |
| q2 | q2 | 10.120066 | 3.508487 | 0.000000 | Beyond Ideal |
| q3 | q3 | 4.425497 | 6.945092 | 8.000000 | Beyond Ideal |
| q4 | q4 | 1932.197655 | 1647.105983 | 2500.000000 | Beyond Average |
| m | m | 26.377667 | 41.317647 | 120.000000 | Beyond Ideal |
| q6 | q6 | 27.802562 | 6.301167 | 0.000000 | Beyond Ideal |
| q7 | q7 | 19.770132 | 6.589167 | 20.000000 | Beyond Average |
| q8 | q8 | 8.612893 | 18.018303 | 100.000000 | Beyond Ideal |
| q9 | q9 | 5.765728 | 11.359191 | 0.000000 | Beyond Average |
| q10 | q10 | 5032.678623 | 8.432998 | 3.000000 | Beyond Ideal |
| q11 | q11 | 5050.380471 | 7079.111932 | 0.000000 | Beyond Average |
| q12 | q12 | 38.078602 | 8.936772 | 0.000000 | Beyond Ideal |
| q13 | q13 | 59.506633 | 20.380235 | 25.000000 | Beyond Ideal |
| q14 | q14 | 13.284132 | 3.547840 | 30.000000 | Beyond Average |
| q15 | q15 | 11.635323 | 14.586236 | 25.000000 | Beyond Ideal |
| q16 | q16 | 7.638550 | 7.958609 | 5.000000 | Beyond Average |
| q17 | q17 | 208.070957 | 1.675530 | 3.000000 | Beyond Ideal |
| q18 | q18 | 15.521523 | 9.793333 | 12.000000 | Beyond Ideal |
| q20 | q20 | 26.018242 | 5.932231 | 0.000000 | Beyond Ideal |
| q21 | q21 | 10.178713 | 8.181063 | 0.000000 | Beyond Ideal |
| q22 | q22 | 13.893401 | 10.539496 | 1.000000 | Beyond Ideal |
| q23 | q23 | 4.134615 | 4.432161 | 1.000000 | Beyond Average |
| q24 | q24 | 46.867638 | 47.390252 | 5.000000 | Beyond Average |
| q25 | q25 | 8980.698978 | 2641.582131 | 0.000000 | Beyond Ideal |
| q26 | q26 | 12.584312 | 4.116415 | 3.000000 | Beyond Ideal |
| q27 | q27 | 28.446488 | 8.343697 | 0.000000 | Beyond Ideal |
| q28 | q28 | 15.162038 | 26.797274 | 22.767883 | Beyond Ideal |
| q29 | q29 | 54.365320 | 72.998331 | 67.816395 | Beyond Ideal |
| q30 | q30 | 32.157095 | 4.830236 | 7.575977 | Beyond Ideal |
| q31 | q31 | 160.200000 | 58.560861 | 136.542155 | Beyond Average |
| q32 | q32 | 44.029362 | 55.881313 | 44.316547 | Beyond Average |
| q33 | q33 | 7.452260 | 3.735182 | 5.003173 | Beyond Ideal |
| q34 | q34 | 53.374080 | 24.978115 | 41.688666 | Beyond Average |
# Count the number of questions in each position category
position_counts <- table(question_position_summary$Position)
# Create a data frame for the position summary
position_summary <- data.frame(
Position = names(position_counts),
Count = as.numeric(position_counts)
)
# Calculate the percentages manually to ensure they're numeric
position_summary$Percentage <- (position_summary$Count / sum(position_summary$Count)) * 100
# Round the percentages to one decimal place
position_summary$Percentage <- round(position_summary$Percentage, 1)
# Display the position count summary
kable(position_summary,
caption = "Distribution of Position Categories") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Position | Count | Percentage |
|---|---|---|
| Beyond Average | 11 | 33.3 |
| Beyond Ideal | 22 | 66.7 |
# Create a visualization showing the distribution of positions
ggplot(position_summary, aes(x = Position, y = Count, fill = Position)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(Count, " (", Percentage, "%)")),
vjust = -0.5, size = 4) +
labs(title = "Distribution of Position Categories",
subtitle = "Where does 'Reasonable' fall relative to 'Average' and 'Ideal'?",
x = "Position Category",
y = "Number of Questions") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "none"
)
# Save the position distribution plot
ggsave("position_distribution.png", width = 10, height = 6, dpi = 300)
# Create a triangle visualization to show the relationships between the ratings
# Prepare data for the triangle visualization
triangle_data <- question_position_summary
# Create a function to normalize values to 0-1 scale for better visualization
normalize <- function(x) {
(x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}
# Create a scatterplot showing the relative positions
ggplot(triangle_data, aes(x = Average_Value, y = Ideal_Value)) +
# Add a diagonal reference line where average = ideal
geom_abline(intercept = 0, slope = 1, linetype = "dashed", alpha = 0.5) +
# Add points for each question's average and ideal values
geom_point(color = "gray60", size = 3, alpha = 0.6) +
# Add points for reasonable values, colored by position category
geom_point(aes(x = Average_Value, y = Reasonable_Value, color = Position), size = 4) +
# Connect reasonable to average with a line
geom_segment(aes(xend = Average_Value, yend = Reasonable_Value, color = Position),
linetype = "dotted", size = 0.5) +
# geom_text(aes(label = Question), hjust = -0.2, vjust = 0, size = 3) +
scale_color_brewer(palette = "Set1") +
labs(title = "Intermediacy Analysis: Position of Reasonable Relative to Average and Ideal",
subtitle = paste0("Questions with Reasonable between Average and Ideal: ",
count_both_sides, " of ", length(question_cols),
" (", scales::percent(count_both_sides/length(question_cols)), ")"),
x = "Average Rating",
y = "Rating Value",
color = "Position Category") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "bottom"
)
# Save the intermediacy plot
ggsave("intermediacy_analysis.png", width = 10, height = 8, dpi = 300)
# Create a more detailed plot showing all three ratings for each question
# Convert to long format for plotting
ratings_long <- triangle_data %>%
select(Question, Average_Value, Ideal_Value, Reasonable_Value, Position) %>%
pivot_longer(cols = c(Average_Value, Ideal_Value, Reasonable_Value),
names_to = "Rating_Type",
values_to = "Value") %>%
mutate(Rating_Type = gsub("_Value", "", Rating_Type))
# Plot the ratings for each question, grouped and colored
ggplot(ratings_long, aes(x = Rating_Type, y = Value, group = Question, color = Position)) +
geom_line(alpha = 0.6) +
geom_point(size = 3) +
facet_wrap(~ Position, scales = "free_y") +
scale_color_brewer(palette = "Set1") +
labs(title = "Rating Patterns by Position Category",
subtitle = "How Average, Ideal, and Reasonable ratings relate within each position category",
x = "Rating Type",
y = "Value") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "none"
)
# Save the ratings pattern plot
ggsave("rating_patterns_by_position.png", width = 12, height = 8, dpi = 300)
# Store all the results in a structured list
analysis_2_results <- list(
ideal_side = list(
vector = ideal_side_vector,
log_vector = ideal_side_vector_log,
count = count_ideal_side,
log_count = count_ideal_side_log,
binomial_test = binomial_result_ideal,
log_binomial_test = binomial_result_ideal_log,
summary = ideal_side_summary
),
average_side = list(
vector = average_side_vector,
log_vector = average_side_vector_log,
count = count_average_side,
log_count = count_average_side_log,
binomial_test = binomial_result_avg_side,
log_binomial_test = binomial_result_avg_side_log,
summary = average_side_summary
),
both_sides = list(
vector = both_sides_vector,
log_vector = both_sides_vector_log,
count = count_both_sides,
log_count = count_both_sides_log,
binomial_test = binomial_result_both_sides,
log_binomial_test = binomial_result_both_sides_log,
summary = both_sides_summary
),
question_position = question_position_summary,
position_summary = position_summary
)
Exclude participants whose answer to Q19 does not equal 15 [comprehension/attention question]. [THIS HAS BEEN COMPLETED IN ANALYSIS 1]
# Calculate medians directly from the attention-check filtered data
column_medians_average <- sapply(Data_All_Average_Pass1[, question_cols], median, na.rm = TRUE)
column_medians_ideal <- sapply(Data_All_Ideal_Pass1[, question_cols], median, na.rm = TRUE)
column_medians_reasonable <- sapply(Data_All_Reasonable_Pass1[, question_cols], median, na.rm = TRUE)
# Create a dataframe for the medians
medians_df <- data.frame(
Question = names(column_medians_average),
Average = column_medians_average,
Ideal = column_medians_ideal,
Reasonable = column_medians_reasonable
)
# Display the medians for each question
kable(medians_df, caption = "Median Values for Each Question by Condition") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 3.0 | 2.0 | 2 |
| q2 | q2 | 7.0 | 2.0 | 0 |
| q3 | q3 | 3.0 | 6.0 | 8 |
| q4 | q4 | 2000.0 | 2000.0 | 2500 |
| m | m | 20.0 | 30.0 | 120 |
| q6 | q6 | 10.0 | 1.0 | 0 |
| q7 | q7 | 15.0 | 5.0 | 20 |
| q8 | q8 | 5.0 | 12.0 | 100 |
| q9 | q9 | 5.0 | 3.0 | 0 |
| q10 | q10 | 5.0 | 0.0 | 3 |
| q11 | q11 | 500.0 | 0.0 | 0 |
| q12 | q12 | 30.0 | 1.0 | 0 |
| q13 | q13 | 40.0 | 10.0 | 25 |
| q14 | q14 | 10.0 | 2.0 | 30 |
| q15 | q15 | 9.0 | 10.0 | 25 |
| q16 | q16 | 5.0 | 5.0 | 5 |
| q17 | q17 | 3.0 | 0.0 | 3 |
| q18 | q18 | 15.0 | 2.0 | 12 |
| q20 | q20 | 20.0 | 0.0 | 0 |
| q21 | q21 | 8.0 | 5.0 | 0 |
| q22 | q22 | 10.0 | 7.0 | 1 |
| q23 | q23 | 2.0 | 3.0 | 1 |
| q24 | q24 | 16.0 | 24.0 | 5 |
| q25 | q25 | 2000.0 | 500.0 | 0 |
| q26 | q26 | 8.0 | 2.0 | 3 |
| q27 | q27 | 12.0 | 5.0 | 0 |
| q28 | q28 | 10.0 | 20.0 | 20 |
| q29 | q29 | 55.0 | 90.0 | 80 |
| q30 | q30 | 12.0 | 4.0 | 4 |
| q31 | q31 | 50.0 | 13.0 | 25 |
| q32 | q32 | 24.0 | 48.0 | 40 |
| q33 | q33 | 5.5 | 2.5 | 4 |
| q34 | q34 | 50.0 | 10.0 | 50 |
# Create a comparison table of medians for plotting
medians_long <- medians_df %>%
pivot_longer(cols = c(Average, Ideal, Reasonable),
names_to = "Condition",
values_to = "Median")
# Check if we have any zeros in medians before taking logs
print("Checking for zeros in medians:")
## [1] "Checking for zeros in medians:"
print("Zeros in average medians:")
## [1] "Zeros in average medians:"
print(names(column_medians_average[column_medians_average == 0]))
## character(0)
print("Zeros in ideal medians:")
## [1] "Zeros in ideal medians:"
print(names(column_medians_ideal[column_medians_ideal == 0]))
## [1] "q10" "q11" "q17" "q20"
print("Zeros in reasonable medians:")
## [1] "Zeros in reasonable medians:"
print(names(column_medians_reasonable[column_medians_reasonable == 0]))
## [1] "q2" "q6" "q9" "q11" "q12" "q20" "q21" "q25" "q27"
# Apply the same safe_log function used in Analysis 1
log_filtered_column_medians_average <- safe_log(column_medians_average)
log_filtered_column_medians_ideal <- safe_log(column_medians_ideal)
log_filtered_column_medians_reasonable <- safe_log(column_medians_reasonable)
# Create a dataframe for the log medians
log_medians_df <- data.frame(
Question = names(log_filtered_column_medians_average),
Average = log_filtered_column_medians_average,
Ideal = log_filtered_column_medians_ideal,
Reasonable = log_filtered_column_medians_reasonable
)
# Display the log medians
kable(log_medians_df, caption = "Log-Transformed Median Values for Each Question by Condition") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 1.0986123 | 0.6931472 | 0.6931472 |
| q2 | q2 | 1.9459101 | 0.6931472 | -4.6051702 |
| q3 | q3 | 1.0986123 | 1.7917595 | 2.0794415 |
| q4 | q4 | 7.6009025 | 7.6009025 | 7.8240460 |
| m | m | 2.9957323 | 3.4011974 | 4.7874917 |
| q6 | q6 | 2.3025851 | 0.0000000 | -4.6051702 |
| q7 | q7 | 2.7080502 | 1.6094379 | 2.9957323 |
| q8 | q8 | 1.6094379 | 2.4849066 | 4.6051702 |
| q9 | q9 | 1.6094379 | 1.0986123 | -4.6051702 |
| q10 | q10 | 1.6094379 | -4.6051702 | 1.0986123 |
| q11 | q11 | 6.2146081 | -4.6051702 | -4.6051702 |
| q12 | q12 | 3.4011974 | 0.0000000 | -4.6051702 |
| q13 | q13 | 3.6888795 | 2.3025851 | 3.2188758 |
| q14 | q14 | 2.3025851 | 0.6931472 | 3.4011974 |
| q15 | q15 | 2.1972246 | 2.3025851 | 3.2188758 |
| q16 | q16 | 1.6094379 | 1.6094379 | 1.6094379 |
| q17 | q17 | 1.0986123 | -4.6051702 | 1.0986123 |
| q18 | q18 | 2.7080502 | 0.6931472 | 2.4849066 |
| q20 | q20 | 2.9957323 | -4.6051702 | -4.6051702 |
| q21 | q21 | 2.0794415 | 1.6094379 | -4.6051702 |
| q22 | q22 | 2.3025851 | 1.9459101 | 0.0000000 |
| q23 | q23 | 0.6931472 | 1.0986123 | 0.0000000 |
| q24 | q24 | 2.7725887 | 3.1780538 | 1.6094379 |
| q25 | q25 | 7.6009025 | 6.2146081 | -4.6051702 |
| q26 | q26 | 2.0794415 | 0.6931472 | 1.0986123 |
| q27 | q27 | 2.4849066 | 1.6094379 | -4.6051702 |
| q28 | q28 | 2.3025851 | 2.9957323 | 2.9957323 |
| q29 | q29 | 4.0073332 | 4.4998097 | 4.3820266 |
| q30 | q30 | 2.4849066 | 1.3862944 | 1.3862944 |
| q31 | q31 | 3.9120230 | 2.5649494 | 3.2188758 |
| q32 | q32 | 3.1780538 | 3.8712010 | 3.6888795 |
| q33 | q33 | 1.7047481 | 0.9162907 | 1.3862944 |
| q34 | q34 | 3.9120230 | 2.3025851 | 3.9120230 |
# Create data frame for regression
dataforAICMedians <- data.frame(
Reasonable = log_filtered_column_medians_reasonable,
Average = log_filtered_column_medians_average,
Ideal = log_filtered_column_medians_ideal
)
# Model I: Median Average predicts Median Reasonable
Model1Medians <- lm(Reasonable ~ Average, data = dataforAICMedians)
AIC1Medians <- AIC(Model1Medians)
r2_model1_median <- summary(Model1Medians)$r.squared
# Model II: Median Ideal predicts Median Reasonable
Model2Medians <- lm(Reasonable ~ Ideal, data = dataforAICMedians)
AIC2Medians <- AIC(Model2Medians)
r2_model2_median <- summary(Model2Medians)$r.squared
# Model III: Both Median Average and Median Ideal predict Median Reasonable
Model3Medians <- lm(Reasonable ~ Average + Ideal, data = dataforAICMedians)
AIC3Medians <- AIC(Model3Medians)
r2_model3_median <- summary(Model3Medians)$r.squared
# Create a regression summary dataframe
regression_summary_medians <- data.frame(
Model = c("Average predicts Reasonable",
"Ideal predicts Reasonable",
"Average + Ideal predict Reasonable"),
AIC = c(AIC1Medians, AIC2Medians, AIC3Medians),
R_squared = c(r2_model1_median, r2_model2_median, r2_model3_median)
)
# Display the regression summary table
kable(regression_summary_medians, caption = "Regression Model Comparison (Median Values)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Model | AIC | R_squared |
|---|---|---|
| Average predicts Reasonable | 183.1287 | 0.0014857 |
| Ideal predicts Reasonable | 176.1116 | 0.1927536 |
| Average + Ideal predict Reasonable | 177.2436 | 0.2137099 |
# Average Median vs Reasonable Median
plot1_median <- ggplot(dataforAICMedians, aes(x = Average, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Model 1: Median Average Predicts Median Reasonable",
subtitle = paste("AIC =", round(AIC1Medians, 2), ", R² =", round(r2_model1_median, 3)),
x = "Log Median Average Rating",
y = "Log Median Reasonable Rating") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
)
# Ideal Median vs Reasonable Median
plot2_median <- ggplot(dataforAICMedians, aes(x = Ideal, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Model 2: Median Ideal Predicts Median Reasonable",
subtitle = paste("AIC =", round(AIC2Medians, 2), ", R² =", round(r2_model2_median, 3)),
x = "Log Median Ideal Rating",
y = "Log Median Reasonable Rating") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
)
# Create Model 3 visualizations for median-based analysis
# Model 3 visualization (effect of average, holding ideal constant)
new_data_median <- expand.grid(
Average = seq(min(dataforAICMedians$Average), max(dataforAICMedians$Average), length.out = 100),
Ideal = median(dataforAICMedians$Ideal) # Hold Ideal constant at median
)
new_data_median$predicted_reasonable <- predict(Model3Medians, newdata = new_data_median)
plot3_median <- ggplot(dataforAICMedians, aes(x = Average, y = Reasonable)) +
geom_point(size = 3, alpha = 0.5) +
geom_line(data = new_data_median, aes(y = predicted_reasonable), color = "blue", size = 1) +
labs(title = "Model 3: Effect of Median Average (Holding Median Ideal at Median)",
subtitle = paste("AIC =", round(AIC3Medians, 2), ", R² =", round(r2_model3_median, 3)),
x = "Log Median Average Rating",
y = "Log Median Reasonable Rating") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
)
# Model 3 visualization (effect of ideal, holding average constant)
new_data2_median <- expand.grid(
Average = median(dataforAICMedians$Average), # Hold Average constant at median
Ideal = seq(min(dataforAICMedians$Ideal), max(dataforAICMedians$Ideal), length.out = 100)
)
new_data2_median$predicted_reasonable <- predict(Model3Medians, newdata = new_data2_median)
plot4_median <- ggplot(dataforAICMedians, aes(x = Ideal, y = Reasonable)) +
geom_point(size = 3, alpha = 0.5) +
geom_line(data = new_data2_median, aes(y = predicted_reasonable), color = "red", size = 1) +
labs(title = "Model 3: Effect of Median Ideal (Holding Median Average at Median)",
subtitle = paste("AIC =", round(AIC3Medians, 2), ", R² =", round(r2_model3_median, 3)),
x = "Log Median Ideal Rating",
y = "Log Median Reasonable Rating") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10)
)
# Display plots
print(plot1_median)
## `geom_smooth()` using formula = 'y ~ x'
print(plot2_median)
## `geom_smooth()` using formula = 'y ~ x'
print(plot3_median)
print(plot4_median)
# Save plots
ggsave("model1_median_average_reasonable.png", plot1_median, width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("model2_median_ideal_reasonable.png", plot2_median, width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("model3_median_average_effect.png", plot3_median, width = 8, height = 6, dpi = 300)
ggsave("model3_median_ideal_effect.png", plot4_median, width = 8, height = 6, dpi = 300)
# Create a combined plot layout
grid_plots_median <- grid.arrange(plot1_median, plot2_median, plot3_median, plot4_median, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggsave("all_median_regression_plots.png", grid_plots_median, width = 14, height = 10, dpi = 300)
# Create a correlation matrix for median data
cor_matrix_median <- cor(dataforAICMedians, use = "complete.obs")
corrplot(cor_matrix_median, method = "color",
type = "upper",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
diag = FALSE,
title = "Correlation Matrix (Median Values)",
mar = c(0,0,1,0))
# Create detailed coefficient tables
model1_coefs_median <- get_model_coefs(Model1Medians)
model2_coefs_median <- get_model_coefs(Model2Medians)
model3_coefs_median <- get_model_coefs(Model3Medians)
# Format p-values
model1_coefs_median$p_value <- sapply(model1_coefs_median$p_value, format_pvalue)
model2_coefs_median$p_value <- sapply(model2_coefs_median$p_value, format_pvalue)
model3_coefs_median$p_value <- sapply(model3_coefs_median$p_value, format_pvalue)
# Display coefficient tables
kable(model1_coefs_median, caption = "Model 1 Coefficients for Median Values (Average → Reasonable)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Term | Estimate | Std_Error | t_value | p_value | |
|---|---|---|---|---|---|
| (Intercept) | (Intercept) | 0.4106407 | 1.2707513 | 0.3231480 | 0.749 |
| Average | Average | 0.0844553 | 0.3932404 | 0.2147677 | 0.831 |
kable(model2_coefs_median, caption = "Model 2 Coefficients for Median Values (Ideal → Reasonable)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Term | Estimate | Std_Error | t_value | p_value | |
|---|---|---|---|---|---|
| (Intercept) | (Intercept) | -0.1073312 | 0.6357038 | -0.1688384 | 0.867 |
| Ideal | Ideal | 0.5729619 | 0.2105944 | 2.7206895 | 0.011 * |
kable(model3_coefs_median, caption = "Model 3 Coefficients for Median Values (Average + Ideal → Reasonable)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Term | Estimate | Std_Error | t_value | p_value | |
|---|---|---|---|---|---|
| (Intercept) | (Intercept) | 0.7510611 | 1.1525172 | 0.6516702 | 0.52 |
| Average | Average | -0.3446637 | 0.3854503 | -0.8941845 | 0.378 |
| Ideal | Ideal | 0.6532777 | 0.2295785 | 2.8455532 | 0.008 ** |
# Print model summaries
cat("\nModel 1 Median Analysis (Average predicts Reasonable):\n")
##
## Model 1 Median Analysis (Average predicts Reasonable):
print(summary(Model1Medians))
##
## Call:
## lm(formula = Reasonable ~ Average, data = dataforAICMedians)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.6577 -5.1517 0.8317 2.4967 6.7715
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.41064 1.27075 0.323 0.749
## Average 0.08446 0.39324 0.215 0.831
##
## Residual standard error: 3.655 on 31 degrees of freedom
## Multiple R-squared: 0.001486, Adjusted R-squared: -0.03072
## F-statistic: 0.04613 on 1 and 31 DF, p-value: 0.8314
cat("\nModel 2 Median Analysis (Ideal predicts Reasonable):\n")
##
## Model 2 Median Analysis (Ideal predicts Reasonable):
print(summary(Model2Medians))
##
## Call:
## lm(formula = Reasonable ~ Ideal, data = dataforAICMedians)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0586 -1.8593 0.9686 2.1809 3.8445
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1073 0.6357 -0.169 0.8670
## Ideal 0.5730 0.2106 2.721 0.0106 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.286 on 31 degrees of freedom
## Multiple R-squared: 0.1928, Adjusted R-squared: 0.1667
## F-statistic: 7.402 on 1 and 31 DF, p-value: 0.01059
cat("\nModel 3 Median Analysis (Average and Ideal predict Reasonable):\n")
##
## Model 3 Median Analysis (Average and Ideal predict Reasonable):
print(summary(Model3Medians))
##
## Call:
## lm(formula = Reasonable ~ Average + Ideal, data = dataforAICMedians)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7963 -1.2299 0.6114 2.2144 4.7272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7511 1.1525 0.652 0.51958
## Average -0.3447 0.3855 -0.894 0.37834
## Ideal 0.6533 0.2296 2.846 0.00792 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.297 on 30 degrees of freedom
## Multiple R-squared: 0.2137, Adjusted R-squared: 0.1613
## F-statistic: 4.077 on 2 and 30 DF, p-value: 0.02715
# Compare mean and median regressions
comparison_df <- data.frame(
Model = c("Average predicts Reasonable", "Ideal predicts Reasonable", "Average + Ideal predict Reasonable"),
Mean_Analysis_AIC = c(AIC1Means, AIC2Means, AIC3Means),
Mean_Analysis_R2 = c(r2_model1, r2_model2, r2_model3),
Median_Analysis_AIC = c(AIC1Medians, AIC2Medians, AIC3Medians),
Median_Analysis_R2 = c(r2_model1_median, r2_model2_median, r2_model3_median)
)
kable(comparison_df, caption = "Comparison of Mean-Based and Median-Based Analyses") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Model | Mean_Analysis_AIC | Mean_Analysis_R2 | Median_Analysis_AIC | Median_Analysis_R2 |
|---|---|---|---|---|
| Average predicts Reasonable | 184.0899 | 0.0010386 | 183.1287 | 0.0014857 |
| Ideal predicts Reasonable | 183.9865 | 0.0041640 | 176.1116 | 0.1927536 |
| Average + Ideal predict Reasonable | 185.5350 | 0.0176975 | 177.2436 | 0.2137099 |
# Store all the results in a structured list
analysis_3_results <- list(
medians_by_question = medians_df,
log_medians_by_question = log_medians_df,
regression_models = list(
model1 = Model1Medians,
model2 = Model2Medians,
model3 = Model3Medians
),
regression_summary = regression_summary_medians,
aic_values = c(AIC1 = AIC1Medians, AIC2 = AIC2Medians, AIC3 = AIC3Medians),
comparison_with_means = comparison_df
)
# Function to check if reasonable is on the "ideal side" of average
# Equal median ratings are treated as being on the predicted side
is_ideal_side_median <- function(average, ideal, reasonable) {
ifelse(abs(reasonable - ideal) <= abs(reasonable - average), 1, 0)
}
# Apply the function to the medians
ideal_side_vector_median <- mapply(is_ideal_side_median,
column_medians_average,
column_medians_ideal,
column_medians_reasonable)
# Count how many questions have reasonableness on the "ideal side" of average
count_ideal_side_median <- sum(ideal_side_vector_median)
# Conduct binomial test to assess whether the proportion is greater than 50%
binomial_result_ideal_median <- binom.test(count_ideal_side_median, length(question_cols), p = 0.5,
alternative = "greater")
# Create a summary table for the binomial test results
ideal_side_summary_median <- data.frame(
On_Ideal_Side_Count = count_ideal_side_median,
Total_Questions = length(question_cols),
Proportion = count_ideal_side_median/length(question_cols),
p_value = binomial_result_ideal_median$p.value
)
# Format proportion and p-value for display
ideal_side_summary_median$Proportion <- scales::percent(ideal_side_summary_median$Proportion)
ideal_side_summary_median$p_value <- format_pvalue(ideal_side_summary_median$p_value)
# Display the binomial test results table
kable(ideal_side_summary_median,
caption = "Binomial Test Results: Is Median Reasonableness on the Ideal Side of Average?") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| On_Ideal_Side_Count | Total_Questions | Proportion | p_value |
|---|---|---|---|
| 25 | 33 | 76% | 0.002 ** |
# Function to check if reasonable is on the "average side" of ideal
is_average_side_median <- function(average, ideal, reasonable) {
ifelse(abs(reasonable - average) <= abs(reasonable - ideal), 1, 0)
}
# Apply the function to the medians
average_side_vector_median <- mapply(is_average_side_median,
column_medians_average,
column_medians_ideal,
column_medians_reasonable)
# Count how many questions have reasonableness on the "average side" of ideal
count_average_side_median <- sum(average_side_vector_median)
# Conduct binomial test to assess whether the proportion is greater than 50%
binomial_result_avg_side_median <- binom.test(count_average_side_median, length(question_cols), p = 0.5,
alternative = "greater")
# Create a summary table for the binomial test results
average_side_summary_median <- data.frame(
On_Average_Side_Count = count_average_side_median,
Total_Questions = length(question_cols),
Proportion = count_average_side_median/length(question_cols),
p_value = binomial_result_avg_side_median$p.value
)
# Format proportion and p-value for display
average_side_summary_median$Proportion <- scales::percent(average_side_summary_median$Proportion)
average_side_summary_median$p_value <- format_pvalue(average_side_summary_median$p_value)
# Display the binomial test results table
kable(average_side_summary_median,
caption = "Binomial Test Results: Is Median Reasonableness on the Average Side of Ideal?") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| On_Average_Side_Count | Total_Questions | Proportion | p_value |
|---|---|---|---|
| 12 | 33 | 36% | 0.96 |
# Function to check if reasonable falls between average and ideal
# Using the same approach as Analysis 2
is_both_sides_median <- function(average, ideal, reasonable) {
ifelse((reasonable - average) * (reasonable - ideal) <= 0, 1, 0)
}
# Apply the function to the medians
both_sides_vector_median <- mapply(is_both_sides_median,
column_medians_average,
column_medians_ideal,
column_medians_reasonable)
# Count how many questions have reasonableness on both sides
count_both_sides_median <- sum(both_sides_vector_median)
# Conduct binomial test with 1/3 as the chance rate
binomial_result_both_sides_median <- binom.test(count_both_sides_median, length(question_cols), p = 1/3,
alternative = "two.sided")
# Create a summary table for the binomial test results
both_sides_summary_median <- data.frame(
On_Both_Sides_Count = count_both_sides_median,
Total_Questions = length(question_cols),
Proportion = count_both_sides_median/length(question_cols),
Expected_Chance = 1/3,
p_value = binomial_result_both_sides_median$p.value
)
# Format proportions and p-values for display
both_sides_summary_median$Proportion <- scales::percent(both_sides_summary_median$Proportion)
both_sides_summary_median$Expected_Chance <- scales::percent(both_sides_summary_median$Expected_Chance)
both_sides_summary_median$p_value <- format_pvalue(both_sides_summary_median$p_value)
# Display the binomial test results table
kable(both_sides_summary_median,
caption = "Binomial Test Results: Is Median Reasonableness Between Average and Ideal?") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| On_Both_Sides_Count | Total_Questions | Proportion | Expected_Chance | p_value |
|---|---|---|---|---|
| 16 | 33 | 48% | 33% | 0.094 |
# Create a table showing the results for each question
question_position_summary_median <- data.frame(
Question = names(column_medians_average),
Average_Value = column_medians_average,
Ideal_Value = column_medians_ideal,
Reasonable_Value = column_medians_reasonable,
On_Ideal_Side = ideal_side_vector_median,
On_Average_Side = average_side_vector_median,
Between_Average_And_Ideal = both_sides_vector_median
)
# Categorize each question's position
question_position_summary_median$Position <- "Other"
question_position_summary_median$Position[question_position_summary_median$Between_Average_And_Ideal == 1] <- "Between Average and Ideal"
question_position_summary_median$Position[question_position_summary_median$On_Ideal_Side == 1 &
question_position_summary_median$On_Average_Side == 0] <- "Beyond Ideal"
question_position_summary_median$Position[question_position_summary_median$On_Ideal_Side == 0 &
question_position_summary_median$On_Average_Side == 1] <- "Beyond Average"
# Display the question position summary
kable(question_position_summary_median[, c("Question", "Average_Value", "Ideal_Value", "Reasonable_Value", "Position")],
caption = "Position of Median Reasonable Rating Relative to Average and Ideal for Each Question") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average_Value | Ideal_Value | Reasonable_Value | Position | |
|---|---|---|---|---|---|
| q1 | q1 | 3.0 | 2.0 | 2 | Beyond Ideal |
| q2 | q2 | 7.0 | 2.0 | 0 | Beyond Ideal |
| q3 | q3 | 3.0 | 6.0 | 8 | Beyond Ideal |
| q4 | q4 | 2000.0 | 2000.0 | 2500 | Other |
| m | m | 20.0 | 30.0 | 120 | Beyond Ideal |
| q6 | q6 | 10.0 | 1.0 | 0 | Beyond Ideal |
| q7 | q7 | 15.0 | 5.0 | 20 | Beyond Average |
| q8 | q8 | 5.0 | 12.0 | 100 | Beyond Ideal |
| q9 | q9 | 5.0 | 3.0 | 0 | Beyond Ideal |
| q10 | q10 | 5.0 | 0.0 | 3 | Beyond Average |
| q11 | q11 | 500.0 | 0.0 | 0 | Beyond Ideal |
| q12 | q12 | 30.0 | 1.0 | 0 | Beyond Ideal |
| q13 | q13 | 40.0 | 10.0 | 25 | Between Average and Ideal |
| q14 | q14 | 10.0 | 2.0 | 30 | Beyond Average |
| q15 | q15 | 9.0 | 10.0 | 25 | Beyond Ideal |
| q16 | q16 | 5.0 | 5.0 | 5 | Between Average and Ideal |
| q17 | q17 | 3.0 | 0.0 | 3 | Beyond Average |
| q18 | q18 | 15.0 | 2.0 | 12 | Beyond Average |
| q20 | q20 | 20.0 | 0.0 | 0 | Beyond Ideal |
| q21 | q21 | 8.0 | 5.0 | 0 | Beyond Ideal |
| q22 | q22 | 10.0 | 7.0 | 1 | Beyond Ideal |
| q23 | q23 | 2.0 | 3.0 | 1 | Beyond Average |
| q24 | q24 | 16.0 | 24.0 | 5 | Beyond Average |
| q25 | q25 | 2000.0 | 500.0 | 0 | Beyond Ideal |
| q26 | q26 | 8.0 | 2.0 | 3 | Beyond Ideal |
| q27 | q27 | 12.0 | 5.0 | 0 | Beyond Ideal |
| q28 | q28 | 10.0 | 20.0 | 20 | Beyond Ideal |
| q29 | q29 | 55.0 | 90.0 | 80 | Beyond Ideal |
| q30 | q30 | 12.0 | 4.0 | 4 | Beyond Ideal |
| q31 | q31 | 50.0 | 13.0 | 25 | Beyond Ideal |
| q32 | q32 | 24.0 | 48.0 | 40 | Beyond Ideal |
| q33 | q33 | 5.5 | 2.5 | 4 | Between Average and Ideal |
| q34 | q34 | 50.0 | 10.0 | 50 | Beyond Average |
# Count the number of questions in each position category
position_counts_median <- table(question_position_summary_median$Position)
# Create a data frame for the position summary
position_summary_median <- data.frame(
Position = names(position_counts_median),
Count = as.numeric(position_counts_median)
)
# Calculate the percentages manually to ensure they're numeric
position_summary_median$Percentage <- (position_summary_median$Count / sum(position_summary_median$Count)) * 100
# Round the percentages to one decimal place
position_summary_median$Percentage <- round(position_summary_median$Percentage, 1)
# Display the position count summary
kable(position_summary_median,
caption = "Distribution of Position Categories (Median Values)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Position | Count | Percentage |
|---|---|---|
| Between Average and Ideal | 3 | 9.1 |
| Beyond Average | 8 | 24.2 |
| Beyond Ideal | 21 | 63.6 |
| Other | 1 | 3.0 |
# Create a visualization showing the distribution of positions
position_distribution_plot_median <- ggplot(position_summary_median, aes(x = Position, y = Count, fill = Position)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(Count, " (", Percentage, "%)")),
vjust = -0.5, size = 4) +
labs(title = "Distribution of Position Categories (Median Values)",
subtitle = "Where does 'Reasonable' fall relative to 'Average' and 'Ideal'?",
x = "Position Category",
y = "Number of Questions") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "none"
)
# Display the plot
print(position_distribution_plot_median)
# Save the position distribution plot
ggsave("position_distribution_median.png", position_distribution_plot_median, width = 10, height = 6, dpi = 300)
# Create a scatterplot showing the relative positions
intermediacy_plot_median <- ggplot(question_position_summary_median, aes(x = Average_Value, y = Ideal_Value)) +
# Add a diagonal reference line where average = ideal
geom_abline(intercept = 0, slope = 1, linetype = "dashed", alpha = 0.5) +
# Add points for each question's average and ideal values
geom_point(color = "gray60", size = 3, alpha = 0.6) +
# Add points for reasonable values, colored by position category
geom_point(aes(x = Average_Value, y = Reasonable_Value, color = Position), size = 4) +
# Connect reasonable to average with a line
geom_segment(aes(xend = Average_Value, yend = Reasonable_Value, color = Position),
linetype = "dotted", size = 0.5) +
scale_color_brewer(palette = "Set1") +
labs(title = "Intermediacy Analysis: Position of Median Reasonable Relative to Average and Ideal",
subtitle = paste0("Questions with Reasonable between Average and Ideal: ",
count_both_sides_median, " of ", length(question_cols),
" (", scales::percent(count_both_sides_median/length(question_cols)), ")"),
x = "Median Average Rating",
y = "Rating Value",
color = "Position Category") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "bottom"
)
# Display the plot
print(intermediacy_plot_median)
# Save the intermediacy plot
ggsave("intermediacy_analysis_median.png", intermediacy_plot_median, width = 10, height = 8, dpi = 300)
# Create a more detailed plot showing all three ratings for each question
# Convert to long format for plotting
ratings_long_median <- question_position_summary_median %>%
select(Question, Average_Value, Ideal_Value, Reasonable_Value, Position) %>%
pivot_longer(cols = c(Average_Value, Ideal_Value, Reasonable_Value),
names_to = "Rating_Type",
values_to = "Value") %>%
mutate(Rating_Type = gsub("_Value", "", Rating_Type))
# Plot the ratings for each question, grouped and colored
rating_patterns_plot_median <- ggplot(ratings_long_median, aes(x = Rating_Type, y = Value, group = Question, color = Position)) +
geom_line(alpha = 0.6) +
geom_point(size = 3) +
facet_wrap(~ Position, scales = "free_y") +
scale_color_brewer(palette = "Set1") +
labs(title = "Rating Patterns by Position Category (Median Values)",
subtitle = "How Average, Ideal, and Reasonable median ratings relate within each position category",
x = "Rating Type",
y = "Value") +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "none"
)
# Display the plot
print(rating_patterns_plot_median)
# Save the ratings pattern plot
ggsave("rating_patterns_by_position_median.png", rating_patterns_plot_median, width = 12, height = 8, dpi = 300)
# Compare the results between mean-based and median-based intermediacy analyses
comparison_intermediacy <- data.frame(
Analysis_Type = c("Mean-based Analysis", "Median-based Analysis"),
On_Ideal_Side_Percentage = c(
paste0(count_ideal_side, "/", length(question_cols), " (", scales::percent(count_ideal_side/length(question_cols)), ")"),
paste0(count_ideal_side_median, "/", length(question_cols), " (", scales::percent(count_ideal_side_median/length(question_cols)), ")")
),
On_Average_Side_Percentage = c(
paste0(count_average_side, "/", length(question_cols), " (", scales::percent(count_average_side/length(question_cols)), ")"),
paste0(count_average_side_median, "/", length(question_cols), " (", scales::percent(count_average_side_median/length(question_cols)), ")")
),
Between_Average_And_Ideal_Percentage = c(
paste0(count_both_sides, "/", length(question_cols), " (", scales::percent(count_both_sides/length(question_cols)), ")"),
paste0(count_both_sides_median, "/", length(question_cols), " (", scales::percent(count_both_sides_median/length(question_cols)), ")")
)
)
# Display the comparison table
kable(comparison_intermediacy, caption = "Comparison of Mean-based and Median-based Intermediacy Analyses") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Analysis_Type | On_Ideal_Side_Percentage | On_Average_Side_Percentage | Between_Average_And_Ideal_Percentage |
|---|---|---|---|
| Mean-based Analysis | 22/33 (67%) | 11/33 (33%) | 10/33 (30%) |
| Median-based Analysis | 25/33 (76%) | 12/33 (36%) | 16/33 (48%) |
# Store all the results in a structured list
analysis_4_results <- list(
ideal_side = list(
vector = ideal_side_vector_median,
count = count_ideal_side_median,
binomial_test = binomial_result_ideal_median,
summary = ideal_side_summary_median
),
average_side = list(
vector = average_side_vector_median,
count = count_average_side_median,
binomial_test = binomial_result_avg_side_median,
summary = average_side_summary_median
),
both_sides = list(
vector = both_sides_vector_median,
count = count_both_sides_median,
binomial_test = binomial_result_both_sides_median,
summary = both_sides_summary_median
),
question_position = question_position_summary_median,
position_summary = position_summary_median,
comparison_with_means = comparison_intermediacy
)
#### Get list of unique countries in the dataset (confirming ten)
countries <- unique(Data_All$Country_name)
print(paste("Number of countries in dataset:", length(countries)))
## [1] "Number of countries in dataset: 10"
print("Countries included in analysis:")
## [1] "Countries included in analysis:"
print(countries)
## [1] "USA" "Colombia" "Brazil" "Germany" "India"
## [6] "Lithuania" "Italy" "Poland" "Spain" "Netherlands"
# Create storage for country-specific results
country_results_mean <- list()
# Function to analyze data for a single country
analyze_country_mean <- function(country) {
cat("\n========== Analyzing Mean Data for:", country, "==========\n")
# Step 1: Split data by condition for this country
country_average <- Data_All_Average %>% filter(Country_name == country)
country_ideal <- Data_All_Ideal %>% filter(Country_name == country)
country_reasonable <- Data_All_Reasonable %>% filter(Country_name == country)
# Apply attention check filter (q19 == 15)
country_average_pass <- country_average %>% filter(q19 == 15)
country_ideal_pass <- country_ideal %>% filter(q19 == 15)
country_reasonable_pass <- country_reasonable %>% filter(q19 == 15)
# Print participant counts
cat("Participants after attention check filter:\n")
cat("Average condition:", nrow(country_average_pass), "\n")
cat("Ideal condition:", nrow(country_ideal_pass), "\n")
cat("Reasonable condition:", nrow(country_reasonable_pass), "\n")
# Convert columns to numeric
country_average_pass <- country_average_pass %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
country_ideal_pass <- country_ideal_pass %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
country_reasonable_pass <- country_reasonable_pass %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
# Step 2: Apply 3 SD filtering
# For average condition
country_avg_filter_results <- filter_data(country_average_pass, question_cols)
country_average_filtered <- country_avg_filter_results$filtered_data
# For ideal condition
country_ideal_filter_results <- filter_data(country_ideal_pass, question_cols)
country_ideal_filtered <- country_ideal_filter_results$filtered_data
# For reasonable condition
country_reasonable_filter_results <- filter_data(country_reasonable_pass, question_cols)
country_reasonable_filtered <- country_reasonable_filter_results$filtered_data
# Calculate means after filtering
country_means_average <- sapply(country_average_filtered[, question_cols], mean, na.rm = TRUE)
country_means_ideal <- sapply(country_ideal_filtered[, question_cols], mean, na.rm = TRUE)
country_means_reasonable <- sapply(country_reasonable_filtered[, question_cols], mean, na.rm = TRUE)
# Step 3: Convert to log scale
country_log_means_average <- safe_log(country_means_average)
country_log_means_ideal <- safe_log(country_means_ideal)
country_log_means_reasonable <- safe_log(country_means_reasonable)
# Step 4: Run regression models
country_dataforAIC <- data.frame(
Reasonable = country_log_means_reasonable,
Average = country_log_means_average,
Ideal = country_log_means_ideal
)
# Skip regression if we have too few data points
if(nrow(country_dataforAIC) < 5) {
cat("Too few data points for regression analysis in", country, "\n")
return(NULL)
}
# Model I: Average predicts reasonable
country_model1 <- lm(Reasonable ~ Average, data = country_dataforAIC)
country_aic1 <- AIC(country_model1)
country_r2_1 <- summary(country_model1)$r.squared
# Model II: Ideal predicts reasonable
country_model2 <- lm(Reasonable ~ Ideal, data = country_dataforAIC)
country_aic2 <- AIC(country_model2)
country_r2_2 <- summary(country_model2)$r.squared
# Model III: Both average and ideal predict reasonable
country_model3 <- lm(Reasonable ~ Average + Ideal, data = country_dataforAIC)
country_aic3 <- AIC(country_model3)
country_r2_3 <- summary(country_model3)$r.squared
# Create regression summary
country_reg_summary <- data.frame(
Model = c("Average predicts Reasonable",
"Ideal predicts Reasonable",
"Average + Ideal predict Reasonable"),
AIC = c(country_aic1, country_aic2, country_aic3),
R_squared = c(country_r2_1, country_r2_2, country_r2_3)
)
# Determine which model has the lowest AIC (best fit)
best_model_idx <- which.min(c(country_aic1, country_aic2, country_aic3))
best_model_name <- c("Average", "Ideal", "Hybrid")[best_model_idx]
cat("\nRegression Results for", country, ":\n")
print(country_reg_summary)
cat("\nBest model for", country, "is:", best_model_name, "\n")
# Return results for this country
return(list(
country = country,
sample_sizes = list(
original = c(nrow(country_average), nrow(country_ideal), nrow(country_reasonable)),
after_attention = c(nrow(country_average_pass), nrow(country_ideal_pass), nrow(country_reasonable_pass)),
after_filtering = c(nrow(country_average_filtered), nrow(country_ideal_filtered), nrow(country_reasonable_filtered))
),
means = list(
average = country_means_average,
ideal = country_means_ideal,
reasonable = country_means_reasonable
),
log_means = list(
average = country_log_means_average,
ideal = country_log_means_ideal,
reasonable = country_log_means_reasonable
),
regression_models = list(
model1 = country_model1,
model2 = country_model2,
model3 = country_model3
),
regression_summary = country_reg_summary,
aic_values = c(Model1 = country_aic1, Model2 = country_aic2, Model3 = country_aic3),
best_model = best_model_name
))
}
# Analyze data for each country
for(country in countries) {
country_results_mean[[country]] <- analyze_country_mean(country)
}
##
## ========== Analyzing Mean Data for: USA ==========
## Participants after attention check filter:
## Average condition: 73
## Ideal condition: 72
## Reasonable condition: 69
##
## Regression Results for USA :
## Model AIC R_squared
## 1 Average predicts Reasonable 184.5695 0.0009859579
## 2 Ideal predicts Reasonable 181.8299 0.0805742574
## 3 Average + Ideal predict Reasonable 179.3529 0.1972186736
##
## Best model for USA is: Hybrid
##
## ========== Analyzing Mean Data for: Colombia ==========
## Participants after attention check filter:
## Average condition: 89
## Ideal condition: 84
## Reasonable condition: 81
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
##
## Regression Results for Colombia :
## Model AIC R_squared
## 1 Average predicts Reasonable 184.2236 3.101717e-05
## 2 Ideal predicts Reasonable 183.1142 3.308842e-02
## 3 Average + Ideal predict Reasonable 180.3834 1.622269e-01
##
## Best model for Colombia is: Hybrid
##
## ========== Analyzing Mean Data for: Brazil ==========
## Participants after attention check filter:
## Average condition: 43
## Ideal condition: 56
## Reasonable condition: 52
##
## Regression Results for Brazil :
## Model AIC R_squared
## 1 Average predicts Reasonable 183.3230 0.004453007
## 2 Ideal predicts Reasonable 183.2444 0.006819475
## 3 Average + Ideal predict Reasonable 182.4260 0.088123747
##
## Best model for Brazil is: Hybrid
##
## ========== Analyzing Mean Data for: Germany ==========
## Participants after attention check filter:
## Average condition: 71
## Ideal condition: 72
## Reasonable condition: 72
##
## Regression Results for Germany :
## Model AIC R_squared
## 1 Average predicts Reasonable 183.8982 0.0001830072
## 2 Ideal predicts Reasonable 179.6743 0.1203071972
## 3 Average + Ideal predict Reasonable 180.0991 0.1613116299
##
## Best model for Germany is: Ideal
##
## ========== Analyzing Mean Data for: India ==========
## Participants after attention check filter:
## Average condition: 74
## Ideal condition: 71
## Reasonable condition: 56
##
## Regression Results for India :
## Model AIC R_squared
## 1 Average predicts Reasonable 181.6445 0.003266586
## 2 Ideal predicts Reasonable 181.6813 0.002156856
## 3 Average + Ideal predict Reasonable 183.6439 0.003286259
##
## Best model for India is: Average
##
## ========== Analyzing Mean Data for: Lithuania ==========
## Participants after attention check filter:
## Average condition: 76
## Ideal condition: 76
## Reasonable condition: 77
##
## Regression Results for Lithuania :
## Model AIC R_squared
## 1 Average predicts Reasonable 183.3373 0.007818933
## 2 Ideal predicts Reasonable 183.4802 0.003515395
## 3 Average + Ideal predict Reasonable 179.5624 0.167104625
##
## Best model for Lithuania is: Hybrid
##
## ========== Analyzing Mean Data for: Italy ==========
## Participants after attention check filter:
## Average condition: 73
## Ideal condition: 83
## Reasonable condition: 75
##
## Regression Results for Italy :
## Model AIC R_squared
## 1 Average predicts Reasonable 183.4638 0.00105871
## 2 Ideal predicts Reasonable 181.6477 0.05454873
## 3 Average + Ideal predict Reasonable 181.2000 0.12213542
##
## Best model for Italy is: Hybrid
##
## ========== Analyzing Mean Data for: Poland ==========
## Participants after attention check filter:
## Average condition: 90
## Ideal condition: 87
## Reasonable condition: 90
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
##
## Regression Results for Poland :
## Model AIC R_squared
## 1 Average predicts Reasonable 183.4983 0.0006096071
## 2 Ideal predicts Reasonable 181.3345 0.0640355998
## 3 Average + Ideal predict Reasonable 179.8445 0.1579659186
##
## Best model for Poland is: Hybrid
##
## ========== Analyzing Mean Data for: Spain ==========
## Participants after attention check filter:
## Average condition: 73
## Ideal condition: 69
## Reasonable condition: 72
##
## Regression Results for Spain :
## Model AIC R_squared
## 1 Average predicts Reasonable 182.8232 0.007816569
## 2 Ideal predicts Reasonable 177.1617 0.164234746
## 3 Average + Ideal predict Reasonable 176.0171 0.240200039
##
## Best model for Spain is: Hybrid
##
## ========== Analyzing Mean Data for: Netherlands ==========
## Participants after attention check filter:
## Average condition: 131
## Ideal condition: 130
## Reasonable condition: 114
##
## Regression Results for Netherlands :
## Model AIC R_squared
## 1 Average predicts Reasonable 183.7478 0.004381741
## 2 Ideal predicts Reasonable 182.7997 0.032579809
## 3 Average + Ideal predict Reasonable 183.9397 0.057464764
##
## Best model for Netherlands is: Ideal
# Extract AIC values and best models for all countries
country_aic_summary <- data.frame(
Country = character(),
AIC_Average = numeric(),
AIC_Ideal = numeric(),
AIC_Hybrid = numeric(),
R2_Average = numeric(),
R2_Ideal = numeric(),
R2_Hybrid = numeric(),
Best_Model = character(),
stringsAsFactors = FALSE
)
for(country in countries) {
if(!is.null(country_results_mean[[country]])) {
country_aic_summary <- rbind(country_aic_summary, data.frame(
Country = country,
AIC_Average = country_results_mean[[country]]$aic_values["Model1"],
AIC_Ideal = country_results_mean[[country]]$aic_values["Model2"],
AIC_Hybrid = country_results_mean[[country]]$aic_values["Model3"],
R2_Average = country_results_mean[[country]]$regression_summary$R_squared[1],
R2_Ideal = country_results_mean[[country]]$regression_summary$R_squared[2],
R2_Hybrid = country_results_mean[[country]]$regression_summary$R_squared[3],
Best_Model = country_results_mean[[country]]$best_model,
stringsAsFactors = FALSE
))
}
}
# Display the summary table
kable(country_aic_summary,
caption = "AIC Values and Best Models by Country (Mean Analysis)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
column_spec(8, background = ifelse(country_aic_summary$Best_Model == "Hybrid", "#e6f7ff",
ifelse(country_aic_summary$Best_Model == "Average", "#e6ffe6", "#ffe6e6")))
| Country | AIC_Average | AIC_Ideal | AIC_Hybrid | R2_Average | R2_Ideal | R2_Hybrid | Best_Model | |
|---|---|---|---|---|---|---|---|---|
| Model1 | USA | 184.5695 | 181.8299 | 179.3529 | 0.0009860 | 0.0805743 | 0.1972187 | Hybrid |
| Model11 | Colombia | 184.2236 | 183.1142 | 180.3834 | 0.0000310 | 0.0330884 | 0.1622269 | Hybrid |
| Model12 | Brazil | 183.3230 | 183.2444 | 182.4260 | 0.0044530 | 0.0068195 | 0.0881237 | Hybrid |
| Model13 | Germany | 183.8982 | 179.6743 | 180.0991 | 0.0001830 | 0.1203072 | 0.1613116 | Ideal |
| Model14 | India | 181.6445 | 181.6813 | 183.6439 | 0.0032666 | 0.0021569 | 0.0032863 | Average |
| Model15 | Lithuania | 183.3373 | 183.4802 | 179.5624 | 0.0078189 | 0.0035154 | 0.1671046 | Hybrid |
| Model16 | Italy | 183.4638 | 181.6477 | 181.2000 | 0.0010587 | 0.0545487 | 0.1221354 | Hybrid |
| Model17 | Poland | 183.4983 | 181.3345 | 179.8445 | 0.0006096 | 0.0640356 | 0.1579659 | Hybrid |
| Model18 | Spain | 182.8232 | 177.1617 | 176.0171 | 0.0078166 | 0.1642347 | 0.2402000 | Hybrid |
| Model19 | Netherlands | 183.7478 | 182.7997 | 183.9397 | 0.0043817 | 0.0325798 | 0.0574648 | Ideal |
# Count best models
best_model_counts <- table(country_aic_summary$Best_Model)
# Create a bar chart of best models by country
ggplot(country_aic_summary, aes(x = reorder(Country, -AIC_Hybrid), y = AIC_Hybrid, fill = Best_Model)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Best_Model), vjust = -0.5, size = 3) +
labs(title = "Model Comparison Across Countries (Mean Analysis)",
subtitle = "Hybrid Model AIC Values with Best Model Indicated",
x = "Country",
y = "AIC Value (Hybrid Model)",
fill = "Best Model") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Create plots showing regression results for each country
country_plots <- list()
for(country in countries) {
if(!is.null(country_results_mean[[country]])) {
# Create a dataframe for plotting
plot_data <- data.frame(
Average = country_results_mean[[country]]$log_means$average,
Ideal = country_results_mean[[country]]$log_means$ideal,
Reasonable = country_results_mean[[country]]$log_means$reasonable
)
# Create plot for this country
country_plot <- ggplot(plot_data) +
geom_point(aes(x = Average, y = Reasonable), color = "blue", size = 3) +
geom_smooth(aes(x = Average, y = Reasonable), method = "lm", se = TRUE, color = "blue", linetype = "solid") +
geom_point(aes(x = Ideal, y = Reasonable), color = "red", size = 3) +
geom_smooth(aes(x = Ideal, y = Reasonable), method = "lm", se = TRUE, color = "red", linetype = "dashed") +
labs(title = paste0("Regression Results for ", country),
subtitle = paste0("Best model: ", country_results_mean[[country]]$best_model),
x = "Log Predictor Values",
y = "Log Reasonable Values") +
theme_minimal() +
annotate("text", x = min(plot_data$Average, plot_data$Ideal, na.rm = TRUE),
y = max(plot_data$Reasonable, na.rm = TRUE),
label = paste0("Average R² = ", round(country_results_mean[[country]]$regression_summary$R_squared[1], 3),
"\nIdeal R² = ", round(country_results_mean[[country]]$regression_summary$R_squared[2], 3),
"\nHybrid R² = ", round(country_results_mean[[country]]$regression_summary$R_squared[3], 3)),
hjust = 0, vjust = 1)
country_plots[[country]] <- country_plot
print(country_plot)
}
}
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Create plots comparing the regression coefficients across countries
# Extract coefficients for Average and Ideal from the hybrid model
coefficient_data <- data.frame(
Country = character(),
Average_Coef = numeric(),
Average_P = numeric(),
Ideal_Coef = numeric(),
Ideal_P = numeric(),
stringsAsFactors = FALSE
)
for(country in countries) {
if(!is.null(country_results_mean[[country]])) {
# Extract coefficients from hybrid model
model3_coefs <- summary(country_results_mean[[country]]$regression_models$model3)$coefficients
if(nrow(model3_coefs) >= 3) { # Ensure we have coefficients for both predictors
coefficient_data <- rbind(coefficient_data, data.frame(
Country = country,
Average_Coef = model3_coefs[2, 1], # Coefficient for Average
Average_P = model3_coefs[2, 4], # p-value for Average
Ideal_Coef = model3_coefs[3, 1], # Coefficient for Ideal
Ideal_P = model3_coefs[3, 4], # p-value for Ideal
stringsAsFactors = FALSE
))
}
}
}
# Format p-values
coefficient_data$Average_Sig <- ifelse(coefficient_data$Average_P < 0.05, "*", "")
coefficient_data$Ideal_Sig <- ifelse(coefficient_data$Ideal_P < 0.05, "*", "")
# Create long format for coefficient plotting
coef_long <- coefficient_data %>%
select(Country, Average_Coef, Ideal_Coef) %>%
pivot_longer(cols = c(Average_Coef, Ideal_Coef),
names_to = "Predictor",
values_to = "Coefficient") %>%
mutate(Predictor = gsub("_Coef", "", Predictor))
# Create coefficient plot
ggplot(coef_long, aes(x = Country, y = Coefficient, fill = Predictor)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_text(aes(label = ifelse(Predictor == "Average",
coefficient_data$Average_Sig[match(Country, coefficient_data$Country)],
coefficient_data$Ideal_Sig[match(Country, coefficient_data$Country)])),
position = position_dodge(width = 0.9), vjust = -0.5) +
labs(title = "Regression Coefficients by Country (Hybrid Model)",
subtitle = "* indicates p < 0.05",
x = "Country",
y = "Coefficient Value") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Save results
analysis_5_results <- list(
country_results = country_results_mean,
aic_summary = country_aic_summary,
coefficient_data = coefficient_data
)
# Create storage for country-specific intermediacy results
country_intermediacy_results <- list()
# Function to analyze intermediacy for a single country
analyze_country_intermediacy <- function(country) {
cat("\n========== Analyzing Intermediacy for:", country, "==========\n")
# We'll use the filtered mean data from Analysis 5
# If country data wasn't successfully processed in Analysis 5, return NULL
if(is.null(country_results_mean[[country]])) {
cat("No valid data for", country, "\n")
return(NULL)
}
# Extract the mean values
country_means_average <- country_results_mean[[country]]$means$average
country_means_ideal <- country_results_mean[[country]]$means$ideal
country_means_reasonable <- country_results_mean[[country]]$means$reasonable
# Step 3: Check if reasonable is on the "ideal side" of average
ideal_side_vector <- mapply(is_both_sides,
country_means_average,
country_means_ideal,
country_means_reasonable)
count_ideal_side <- sum(ideal_side_vector)
# Conduct binomial test
binomial_result_ideal <- binom.test(count_ideal_side, length(ideal_side_vector),
p = 0.5, alternative = "greater")
# Step 4: Check if reasonable is on the "average side" of ideal
average_side_vector <- mapply(is_average_side,
country_means_average,
country_means_ideal,
country_means_reasonable)
count_average_side <- sum(average_side_vector)
# Conduct binomial test
binomial_result_avg_side <- binom.test(count_average_side, length(average_side_vector),
p = 0.5, alternative = "greater")
# Step 5: Check if reasonable is both on the ideal side of average and average side of ideal
both_sides_vector <- mapply(is_both_sides,
country_means_average,
country_means_ideal,
country_means_reasonable)
count_both_sides <- sum(both_sides_vector)
# Conduct binomial test with 1/3 as the chance rate
binomial_result_both_sides <- binom.test(count_both_sides, length(both_sides_vector),
p = 1/3, alternative = "two.sided")
# Create a summary table
intermediacy_summary <- data.frame(
Test = c("On Ideal Side of Average", "On Average Side of Ideal", "Between Average and Ideal"),
Count = c(count_ideal_side, count_average_side, count_both_sides),
Total = rep(length(ideal_side_vector), 3),
Proportion = c(count_ideal_side/length(ideal_side_vector),
count_average_side/length(average_side_vector),
count_both_sides/length(both_sides_vector)),
p_value = c(binomial_result_ideal$p.value,
binomial_result_avg_side$p.value,
binomial_result_both_sides$p.value)
)
# Format proportions and p-values
intermediacy_summary$Proportion <- scales::percent(intermediacy_summary$Proportion)
intermediacy_summary$p_value <- sapply(intermediacy_summary$p_value, format_pvalue)
cat("\nIntermediary Analysis for", country, ":\n")
print(intermediacy_summary)
# Return results
return(list(
country = country,
ideal_side = list(
vector = ideal_side_vector,
count = count_ideal_side,
proportion = count_ideal_side/length(ideal_side_vector),
binomial_test = binomial_result_ideal
),
average_side = list(
vector = average_side_vector,
count = count_average_side,
proportion = count_average_side/length(average_side_vector),
binomial_test = binomial_result_avg_side
),
both_sides = list(
vector = both_sides_vector,
count = count_both_sides,
proportion = count_both_sides/length(both_sides_vector),
binomial_test = binomial_result_both_sides
),
summary = intermediacy_summary
))
}
# Analyze intermediacy for each country
for(country in countries) {
country_intermediacy_results[[country]] <- analyze_country_intermediacy(country)
}
##
## ========== Analyzing Intermediacy for: USA ==========
##
## Intermediary Analysis for USA :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 10 33 30% 0.993
## 2 On Average Side of Ideal 10 33 30% 0.993
## 3 Between Average and Ideal 10 33 30% 0.854
##
## ========== Analyzing Intermediacy for: Colombia ==========
##
## Intermediary Analysis for Colombia :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 7 33 21.2% 1
## 2 On Average Side of Ideal 10 33 30.3% 0.993
## 3 Between Average and Ideal 7 33 21.2% 0.195
##
## ========== Analyzing Intermediacy for: Brazil ==========
##
## Intermediary Analysis for Brazil :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 6 33 18.2% 1
## 2 On Average Side of Ideal 7 33 21.2% 1
## 3 Between Average and Ideal 6 33 18.2% 0.067
##
## ========== Analyzing Intermediacy for: Germany ==========
##
## Intermediary Analysis for Germany :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 12 33 36.4% 0.96
## 2 On Average Side of Ideal 11 33 33.3% 0.982
## 3 Between Average and Ideal 12 33 36.4% 0.714
##
## ========== Analyzing Intermediacy for: India ==========
##
## Intermediary Analysis for India :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 4 33 12% 1
## 2 On Average Side of Ideal 12 33 36% 0.96
## 3 Between Average and Ideal 4 33 12% 0.009 **
##
## ========== Analyzing Intermediacy for: Lithuania ==========
##
## Intermediary Analysis for Lithuania :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 9 33 27.3% 0.998
## 2 On Average Side of Ideal 12 33 36.4% 0.96
## 3 Between Average and Ideal 9 33 27.3% 0.58
##
## ========== Analyzing Intermediacy for: Italy ==========
##
## Intermediary Analysis for Italy :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 15 33 45% 0.757
## 2 On Average Side of Ideal 6 33 18% 1
## 3 Between Average and Ideal 15 33 45% 0.143
##
## ========== Analyzing Intermediacy for: Poland ==========
##
## Intermediary Analysis for Poland :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 13 33 39.4% 0.919
## 2 On Average Side of Ideal 10 33 30.3% 0.993
## 3 Between Average and Ideal 13 33 39.4% 0.464
##
## ========== Analyzing Intermediacy for: Spain ==========
##
## Intermediary Analysis for Spain :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 11 33 33.3% 0.982
## 2 On Average Side of Ideal 9 33 27.3% 0.998
## 3 Between Average and Ideal 11 33 33.3% 1
##
## ========== Analyzing Intermediacy for: Netherlands ==========
##
## Intermediary Analysis for Netherlands :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 11 33 33.3% 0.982
## 2 On Average Side of Ideal 9 33 27.3% 0.998
## 3 Between Average and Ideal 11 33 33.3% 1
# Create summary table for intermediacy across countries
intermediacy_by_country <- data.frame(
Country = character(),
Ideal_Side_Proportion = numeric(),
Ideal_Side_P = numeric(),
Average_Side_Proportion = numeric(),
Average_Side_P = numeric(),
Between_Proportion = numeric(),
Between_P = numeric(),
stringsAsFactors = FALSE
)
for(country in countries) {
if(!is.null(country_intermediacy_results[[country]])) {
intermediacy_by_country <- rbind(intermediacy_by_country, data.frame(
Country = country,
Ideal_Side_Proportion = country_intermediacy_results[[country]]$ideal_side$proportion,
Ideal_Side_P = country_intermediacy_results[[country]]$ideal_side$binomial_test$p.value,
Average_Side_Proportion = country_intermediacy_results[[country]]$average_side$proportion,
Average_Side_P = country_intermediacy_results[[country]]$average_side$binomial_test$p.value,
Between_Proportion = country_intermediacy_results[[country]]$both_sides$proportion,
Between_P = country_intermediacy_results[[country]]$both_sides$binomial_test$p.value,
stringsAsFactors = FALSE
))
}
}
# Format the proportions as percentages
intermediacy_by_country$Ideal_Side_Proportion <- scales::percent(intermediacy_by_country$Ideal_Side_Proportion)
intermediacy_by_country$Average_Side_Proportion <- scales::percent(intermediacy_by_country$Average_Side_Proportion)
intermediacy_by_country$Between_Proportion <- scales::percent(intermediacy_by_country$Between_Proportion)
# Add significance indicators
intermediacy_by_country$Ideal_Side_Sig <- ifelse(intermediacy_by_country$Ideal_Side_P < 0.05, "*", "")
intermediacy_by_country$Average_Side_Sig <- ifelse(intermediacy_by_country$Average_Side_P < 0.05, "*", "")
intermediacy_by_country$Between_Sig <- ifelse(intermediacy_by_country$Between_P < 0.05, "*", "")
# Create display table
display_intermediacy <- intermediacy_by_country %>%
select(Country,
Ideal_Side = Ideal_Side_Proportion,
Ideal_Sig = Ideal_Side_Sig,
Average_Side = Average_Side_Proportion,
Average_Sig = Average_Side_Sig,
Between = Between_Proportion,
Between_Sig = Between_Sig)
# Display the table
kable(display_intermediacy,
caption = "Intermediacy Analysis by Country (Mean Values)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
add_header_above(c(" " = 1, "Ideal Side of Average" = 2, "Average Side of Ideal" = 2, "Between Average and Ideal" = 2))
| Country | Ideal_Side | Ideal_Sig | Average_Side | Average_Sig | Between | Between_Sig |
|---|---|---|---|---|---|---|
| USA | 30.3% | 30.3% | 30.3% | |||
| Colombia | 21.2% | 30.3% | 21.2% | |||
| Brazil | 18.2% | 21.2% | 18.2% | |||
| Germany | 36.4% | 33.3% | 36.4% | |||
| India | 12.1% | 36.4% | 12.1% |
|
||
| Lithuania | 27.3% | 36.4% | 27.3% | |||
| Italy | 45.5% | 18.2% | 45.5% | |||
| Poland | 39.4% | 30.3% | 39.4% | |||
| Spain | 33.3% | 27.3% | 33.3% | |||
| Netherlands | 33.3% | 27.3% | 33.3% |
# Create a visualization of the "Between" proportions across countries
between_plot <- ggplot(intermediacy_by_country, aes(x = reorder(Country, -as.numeric(gsub("%", "", Between_Proportion))/100),
y = as.numeric(gsub("%", "", Between_Proportion))/100)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(Between_Proportion, Between_Sig)), vjust = -0.5) +
geom_hline(yintercept = 1/3, linetype = "dashed", color = "red") +
labs(title = "Proportion of Questions where Reasonable is Between Average and Ideal",
subtitle = "By Country (Mean Analysis), Red line = Chance level (1/3), * = p < 0.05",
x = "Country",
y = "Proportion") +
scale_y_continuous(labels = scales::percent) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(between_plot)
ggsave("between_proportions_by_country.png", between_plot, width = 10, height = 6, dpi = 300)
# Save the results
analysis_6_results <- list(
country_results = country_intermediacy_results,
summary = intermediacy_by_country
)
# Create storage for country-specific median results
country_results_median <- list()
# Function to analyze median data for a single country
analyze_country_median <- function(country) {
cat("\n========== Analyzing Median Data for:", country, "==========\n")
# Step 1: Split data by condition for this country
country_average <- Data_All_Average %>% filter(Country_name == country)
country_ideal <- Data_All_Ideal %>% filter(Country_name == country)
country_reasonable <- Data_All_Reasonable %>% filter(Country_name == country)
# Apply attention check filter (q19 == 15)
country_average_pass <- country_average %>% filter(q19 == 15)
country_ideal_pass <- country_ideal %>% filter(q19 == 15)
country_reasonable_pass <- country_reasonable %>% filter(q19 == 15)
# Print participant counts
cat("Participants after attention check filter:\n")
cat("Average condition:", nrow(country_average_pass), "\n")
cat("Ideal condition:", nrow(country_ideal_pass), "\n")
cat("Reasonable condition:", nrow(country_reasonable_pass), "\n")
# Convert columns to numeric
country_average_pass <- country_average_pass %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
country_ideal_pass <- country_ideal_pass %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
country_reasonable_pass <- country_reasonable_pass %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
# Step 2: Calculate medians (no 3 SD filtering for median analysis)
country_medians_average <- sapply(country_average_pass[, question_cols], median, na.rm = TRUE)
country_medians_ideal <- sapply(country_ideal_pass[, question_cols], median, na.rm = TRUE)
country_medians_reasonable <- sapply(country_reasonable_pass[, question_cols], median, na.rm = TRUE)
# Step 3: Convert to log scale
country_log_medians_average <- safe_log(country_medians_average)
country_log_medians_ideal <- safe_log(country_medians_ideal)
country_log_medians_reasonable <- safe_log(country_medians_reasonable)
# Step 4: Run regression models
country_dataforAIC_median <- data.frame(
Reasonable = country_log_medians_reasonable,
Average = country_log_medians_average,
Ideal = country_log_medians_ideal
)
# Skip regression if we have too few data points
if(nrow(country_dataforAIC_median) < 5) {
cat("Too few data points for regression analysis in", country, "\n")
return(NULL)
}
# Model I: Average predicts reasonable
country_model1_median <- lm(Reasonable ~ Average, data = country_dataforAIC_median)
country_aic1_median <- AIC(country_model1_median)
country_r2_1_median <- summary(country_model1_median)$r.squared
# Model II: Ideal predicts reasonable
country_model2_median <- lm(Reasonable ~ Ideal, data = country_dataforAIC_median)
country_aic2_median <- AIC(country_model2_median)
country_r2_2_median <- summary(country_model2_median)$r.squared
# Model III: Both average and ideal predict reasonable
country_model3_median <- lm(Reasonable ~ Average + Ideal, data = country_dataforAIC_median)
country_aic3_median <- AIC(country_model3_median)
country_r2_3_median <- summary(country_model3_median)$r.squared
# Create regression summary
country_reg_summary_median <- data.frame(
Model = c("Average predicts Reasonable",
"Ideal predicts Reasonable",
"Average + Ideal predict Reasonable"),
AIC = c(country_aic1_median, country_aic2_median, country_aic3_median),
R_squared = c(country_r2_1_median, country_r2_2_median, country_r2_3_median)
)
# Determine which model has the lowest AIC (best fit)
best_model_idx <- which.min(c(country_aic1_median, country_aic2_median, country_aic3_median))
best_model_name <- c("Average", "Ideal", "Hybrid")[best_model_idx]
cat("\nRegression Results for", country, "(Median Analysis):\n")
print(country_reg_summary_median)
cat("\nBest model for", country, "is:", best_model_name, "\n")
# Return results for this country
return(list(
country = country,
sample_sizes = list(
after_attention = c(nrow(country_average_pass), nrow(country_ideal_pass), nrow(country_reasonable_pass))
),
medians = list(
average = country_medians_average,
ideal = country_medians_ideal,
reasonable = country_medians_reasonable
),
log_medians = list(
average = country_log_medians_average,
ideal = country_log_medians_ideal,
reasonable = country_log_medians_reasonable
),
regression_models = list(
model1 = country_model1_median,
model2 = country_model2_median,
model3 = country_model3_median
),
regression_summary = country_reg_summary_median,
aic_values = c(Model1 = country_aic1_median, Model2 = country_aic2_median, Model3 = country_aic3_median),
best_model = best_model_name
))
}
# Analyze median data for each country
for(country in countries) {
country_results_median[[country]] <- analyze_country_median(country)
}
##
## ========== Analyzing Median Data for: USA ==========
## Participants after attention check filter:
## Average condition: 73
## Ideal condition: 72
## Reasonable condition: 69
##
## Regression Results for USA (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 182.8055 0.003201958
## 2 Ideal predicts Reasonable 171.6330 0.289488677
## 3 Average + Ideal predict Reasonable 173.0812 0.301269643
##
## Best model for USA is: Ideal
##
## ========== Analyzing Median Data for: Colombia ==========
## Participants after attention check filter:
## Average condition: 89
## Ideal condition: 84
## Reasonable condition: 81
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
##
## Regression Results for Colombia (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 182.6409 0.001256565
## 2 Ideal predicts Reasonable 179.8702 0.081686636
## 3 Average + Ideal predict Reasonable 177.8151 0.187874069
##
## Best model for Colombia is: Hybrid
##
## ========== Analyzing Median Data for: Brazil ==========
## Participants after attention check filter:
## Average condition: 43
## Ideal condition: 56
## Reasonable condition: 52
##
## Regression Results for Brazil (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 182.9969 0.004584655
## 2 Ideal predicts Reasonable 177.8130 0.149289295
## 3 Average + Ideal predict Reasonable 178.2859 0.187758757
##
## Best model for Brazil is: Ideal
##
## ========== Analyzing Median Data for: Germany ==========
## Participants after attention check filter:
## Average condition: 71
## Ideal condition: 72
## Reasonable condition: 72
##
## Regression Results for Germany (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 183.3067 0.0005518197
## 2 Ideal predicts Reasonable 174.6719 0.2306542899
## 3 Average + Ideal predict Reasonable 175.6038 0.2551562716
##
## Best model for Germany is: Ideal
##
## ========== Analyzing Median Data for: India ==========
## Participants after attention check filter:
## Average condition: 74
## Ideal condition: 71
## Reasonable condition: 56
##
## Regression Results for India (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 179.6270 0.02412332
## 2 Ideal predicts Reasonable 179.2726 0.03454875
## 3 Average + Ideal predict Reasonable 181.1041 0.03946585
##
## Best model for India is: Ideal
##
## ========== Analyzing Median Data for: Lithuania ==========
## Participants after attention check filter:
## Average condition: 76
## Ideal condition: 76
## Reasonable condition: 77
##
## Regression Results for Lithuania (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 182.6574 0.002627272
## 2 Ideal predicts Reasonable 178.1545 0.129842497
## 3 Average + Ideal predict Reasonable 178.0779 0.182913172
##
## Best model for Lithuania is: Hybrid
##
## ========== Analyzing Median Data for: Italy ==========
## Participants after attention check filter:
## Average condition: 73
## Ideal condition: 83
## Reasonable condition: 75
##
## Regression Results for Italy (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 183.3033 0.0001271376
## 2 Ideal predicts Reasonable 178.9606 0.1234184008
## 3 Average + Ideal predict Reasonable 180.7836 0.1281056056
##
## Best model for Italy is: Ideal
##
## ========== Analyzing Median Data for: Poland ==========
## Participants after attention check filter:
## Average condition: 90
## Ideal condition: 87
## Reasonable condition: 90
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
##
## Regression Results for Poland (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 183.5047 8.151568e-05
## 2 Ideal predicts Reasonable 179.0232 1.270572e-01
## 3 Average + Ideal predict Reasonable 180.5577 1.392848e-01
##
## Best model for Poland is: Ideal
##
## ========== Analyzing Median Data for: Spain ==========
## Participants after attention check filter:
## Average condition: 73
## Ideal condition: 69
## Reasonable condition: 72
##
## Regression Results for Spain (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 181.8793 0.01049767
## 2 Ideal predicts Reasonable 169.4416 0.32121629
## 3 Average + Ideal predict Reasonable 171.4380 0.32129036
##
## Best model for Spain is: Ideal
##
## ========== Analyzing Median Data for: Netherlands ==========
## Participants after attention check filter:
## Average condition: 131
## Ideal condition: 130
## Reasonable condition: 114
##
## Regression Results for Netherlands (Median Analysis):
## Model AIC R_squared
## 1 Average predicts Reasonable 183.4221 0.006706274
## 2 Ideal predicts Reasonable 172.2985 0.290934079
## 3 Average + Ideal predict Reasonable 173.1044 0.316133837
##
## Best model for Netherlands is: Ideal
# Extract AIC values and best models for all countries (median analysis)
country_aic_summary_median <- data.frame(
Country = character(),
AIC_Average = numeric(),
AIC_Ideal = numeric(),
AIC_Hybrid = numeric(),
R2_Average = numeric(),
R2_Ideal = numeric(),
R2_Hybrid = numeric(),
Best_Model = character(),
stringsAsFactors = FALSE
)
for(country in countries) {
if(!is.null(country_results_median[[country]])) {
country_aic_summary_median <- rbind(country_aic_summary_median, data.frame(
Country = country,
AIC_Average = country_results_median[[country]]$aic_values["Model1"],
AIC_Ideal = country_results_median[[country]]$aic_values["Model2"],
AIC_Hybrid = country_results_median[[country]]$aic_values["Model3"],
R2_Average = country_results_median[[country]]$regression_summary$R_squared[1],
R2_Ideal = country_results_median[[country]]$regression_summary$R_squared[2],
R2_Hybrid = country_results_median[[country]]$regression_summary$R_squared[3],
Best_Model = country_results_median[[country]]$best_model,
stringsAsFactors = FALSE
))
}
}
# Display the summary table
kable(country_aic_summary_median,
caption = "AIC Values and Best Models by Country (Median Analysis)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
column_spec(8, background = ifelse(country_aic_summary_median$Best_Model == "Hybrid", "#e6f7ff",
ifelse(country_aic_summary_median$Best_Model == "Average", "#e6ffe6", "#ffe6e6")))
| Country | AIC_Average | AIC_Ideal | AIC_Hybrid | R2_Average | R2_Ideal | R2_Hybrid | Best_Model | |
|---|---|---|---|---|---|---|---|---|
| Model1 | USA | 182.8055 | 171.6330 | 173.0812 | 0.0032020 | 0.2894887 | 0.3012696 | Ideal |
| Model11 | Colombia | 182.6409 | 179.8702 | 177.8151 | 0.0012566 | 0.0816866 | 0.1878741 | Hybrid |
| Model12 | Brazil | 182.9969 | 177.8130 | 178.2859 | 0.0045847 | 0.1492893 | 0.1877588 | Ideal |
| Model13 | Germany | 183.3067 | 174.6719 | 175.6038 | 0.0005518 | 0.2306543 | 0.2551563 | Ideal |
| Model14 | India | 179.6270 | 179.2726 | 181.1041 | 0.0241233 | 0.0345487 | 0.0394658 | Ideal |
| Model15 | Lithuania | 182.6574 | 178.1545 | 178.0779 | 0.0026273 | 0.1298425 | 0.1829132 | Hybrid |
| Model16 | Italy | 183.3033 | 178.9606 | 180.7836 | 0.0001271 | 0.1234184 | 0.1281056 | Ideal |
| Model17 | Poland | 183.5047 | 179.0232 | 180.5577 | 0.0000815 | 0.1270572 | 0.1392848 | Ideal |
| Model18 | Spain | 181.8793 | 169.4416 | 171.4380 | 0.0104977 | 0.3212163 | 0.3212904 | Ideal |
| Model19 | Netherlands | 183.4221 | 172.2985 | 173.1044 | 0.0067063 | 0.2909341 | 0.3161338 | Ideal |
# Compare mean and median analysis
comparison_mean_median <- data.frame(
Country = character(),
Mean_Best_Model = character(),
Median_Best_Model = character(),
Mean_Hybrid_AIC = numeric(),
Median_Hybrid_AIC = numeric(),
Mean_Hybrid_R2 = numeric(),
Median_Hybrid_R2 = numeric(),
stringsAsFactors = FALSE
)
for(country in countries) {
if(!is.null(country_results_mean[[country]]) && !is.null(country_results_median[[country]])) {
comparison_mean_median <- rbind(comparison_mean_median, data.frame(
Country = country,
Mean_Best_Model = country_results_mean[[country]]$best_model,
Median_Best_Model = country_results_median[[country]]$best_model,
Mean_Hybrid_AIC = country_results_mean[[country]]$aic_values["Model3"],
Median_Hybrid_AIC = country_results_median[[country]]$aic_values["Model3"],
Mean_Hybrid_R2 = country_results_mean[[country]]$regression_summary$R_squared[3],
Median_Hybrid_R2 = country_results_median[[country]]$regression_summary$R_squared[3],
stringsAsFactors = FALSE
))
}
}
# Display the comparison table
kable(comparison_mean_median,
caption = "Comparison of Mean and Median Analyses by Country") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
column_spec(2, background = ifelse(comparison_mean_median$Mean_Best_Model == "Hybrid", "#e6f7ff",
ifelse(comparison_mean_median$Mean_Best_Model == "Average", "#e6ffe6", "#ffe6e6"))) %>%
column_spec(3, background = ifelse(comparison_mean_median$Median_Best_Model == "Hybrid", "#e6f7ff",
ifelse(comparison_mean_median$Median_Best_Model == "Average", "#e6ffe6", "#ffe6e6")))
| Country | Mean_Best_Model | Median_Best_Model | Mean_Hybrid_AIC | Median_Hybrid_AIC | Mean_Hybrid_R2 | Median_Hybrid_R2 | |
|---|---|---|---|---|---|---|---|
| Model3 | USA | Hybrid | Ideal | 179.3529 | 173.0812 | 0.1972187 | 0.3012696 |
| Model31 | Colombia | Hybrid | Hybrid | 180.3834 | 177.8151 | 0.1622269 | 0.1878741 |
| Model32 | Brazil | Hybrid | Ideal | 182.4260 | 178.2859 | 0.0881237 | 0.1877588 |
| Model33 | Germany | Ideal | Ideal | 180.0991 | 175.6038 | 0.1613116 | 0.2551563 |
| Model34 | India | Average | Ideal | 183.6439 | 181.1041 | 0.0032863 | 0.0394658 |
| Model35 | Lithuania | Hybrid | Hybrid | 179.5624 | 178.0779 | 0.1671046 | 0.1829132 |
| Model36 | Italy | Hybrid | Ideal | 181.2000 | 180.7836 | 0.1221354 | 0.1281056 |
| Model37 | Poland | Hybrid | Ideal | 179.8445 | 180.5577 | 0.1579659 | 0.1392848 |
| Model38 | Spain | Hybrid | Ideal | 176.0171 | 171.4380 | 0.2402000 | 0.3212904 |
| Model39 | Netherlands | Ideal | Ideal | 183.9397 | 173.1044 | 0.0574648 | 0.3161338 |
# Save results
analysis_7_results <- list(
country_results = country_results_median,
aic_summary = country_aic_summary_median,
comparison_with_means = comparison_mean_median
)
# Create storage for country-specific median intermediacy results
country_intermediacy_results_median <- list()
# Function to analyze median intermediacy for a single country
analyze_country_intermediacy_median <- function(country) {
cat("\n========== Analyzing Median Intermediacy for:", country, "==========\n")
# Use the median data from Analysis 7
# If country data wasn't successfully processed in Analysis 7, return NULL
if(is.null(country_results_median[[country]])) {
cat("No valid data for", country, "\n")
return(NULL)
}
# Extract the median values
country_medians_average <- country_results_median[[country]]$medians$average
country_medians_ideal <- country_results_median[[country]]$medians$ideal
country_medians_reasonable <- country_results_median[[country]]$medians$reasonable
# Step 3: Check if reasonable is on the "ideal side" of average
# Note: For median analysis, we treat equal values as being on the predicted side
ideal_side_vector_median <- mapply(is_ideal_side_median,
country_medians_average,
country_medians_ideal,
country_medians_reasonable)
count_ideal_side_median <- sum(ideal_side_vector_median)
# Conduct binomial test
binomial_result_ideal_median <- binom.test(count_ideal_side_median, length(ideal_side_vector_median),
p = 0.5, alternative = "greater")
# Step 4: Check if reasonable is on the "average side" of ideal
average_side_vector_median <- mapply(is_average_side_median,
country_medians_average,
country_medians_ideal,
country_medians_reasonable)
count_average_side_median <- sum(average_side_vector_median)
# Conduct binomial test
binomial_result_avg_side_median <- binom.test(count_average_side_median, length(average_side_vector_median),
p = 0.5, alternative = "greater")
# Step 5: Check if reasonable is both on the ideal side of average and average side of ideal
both_sides_vector_median <- mapply(is_both_sides_median,
country_medians_average,
country_medians_ideal,
country_medians_reasonable)
count_both_sides_median <- sum(both_sides_vector_median)
# Conduct binomial test with 1/3 as the chance rate
binomial_result_both_sides_median <- binom.test(count_both_sides_median, length(both_sides_vector_median),
p = 1/3, alternative = "two.sided")
# Create a summary table
intermediacy_summary_median <- data.frame(
Test = c("On Ideal Side of Average", "On Average Side of Ideal", "Between Average and Ideal"),
Count = c(count_ideal_side_median, count_average_side_median, count_both_sides_median),
Total = rep(length(ideal_side_vector_median), 3),
Proportion = c(count_ideal_side_median/length(ideal_side_vector_median),
count_average_side_median/length(average_side_vector_median),
count_both_sides_median/length(both_sides_vector_median)),
p_value = c(binomial_result_ideal_median$p.value,
binomial_result_avg_side_median$p.value,
binomial_result_both_sides_median$p.value)
)
# Format proportions and p-values
intermediacy_summary_median$Proportion <- scales::percent(intermediacy_summary_median$Proportion)
intermediacy_summary_median$p_value <- sapply(intermediacy_summary_median$p_value, format_pvalue)
cat("\nMedian Intermediary Analysis for", country, ":\n")
print(intermediacy_summary_median)
# Return results
return(list(
country = country,
ideal_side = list(
vector = ideal_side_vector_median,
count = count_ideal_side_median,
proportion = count_ideal_side_median/length(ideal_side_vector_median),
binomial_test = binomial_result_ideal_median
),
average_side = list(
vector = average_side_vector_median,
count = count_average_side_median,
proportion = count_average_side_median/length(average_side_vector_median),
binomial_test = binomial_result_avg_side_median
),
both_sides = list(
vector = both_sides_vector_median,
count = count_both_sides_median,
proportion = count_both_sides_median/length(both_sides_vector_median),
binomial_test = binomial_result_both_sides_median
),
summary = intermediacy_summary_median
))
}
# Analyze median intermediacy for each country
for(country in countries) {
country_intermediacy_results_median[[country]] <- analyze_country_intermediacy_median(country)
}
##
## ========== Analyzing Median Intermediacy for: USA ==========
##
## Median Intermediary Analysis for USA :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 26 33 78.8% < 0.001 ***
## 2 On Average Side of Ideal 15 33 45.5% 0.757
## 3 Between Average and Ideal 14 33 42.4% 0.272
##
## ========== Analyzing Median Intermediacy for: Colombia ==========
##
## Median Intermediary Analysis for Colombia :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 26 33 78.8% < 0.001 ***
## 2 On Average Side of Ideal 11 33 33.3% 0.982
## 3 Between Average and Ideal 13 33 39.4% 0.464
##
## ========== Analyzing Median Intermediacy for: Brazil ==========
##
## Median Intermediary Analysis for Brazil :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 26 33 79% < 0.001 ***
## 2 On Average Side of Ideal 8 33 24% 0.999
## 3 Between Average and Ideal 15 33 45% 0.143
##
## ========== Analyzing Median Intermediacy for: Germany ==========
##
## Median Intermediary Analysis for Germany :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 25 33 76% 0.002 **
## 2 On Average Side of Ideal 13 33 39% 0.919
## 3 Between Average and Ideal 18 33 55% 0.015 *
##
## ========== Analyzing Median Intermediacy for: India ==========
##
## Median Intermediary Analysis for India :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 23 33 70% 0.018 *
## 2 On Average Side of Ideal 18 33 55% 0.364
## 3 Between Average and Ideal 3 33 9% 0.002 **
##
## ========== Analyzing Median Intermediacy for: Lithuania ==========
##
## Median Intermediary Analysis for Lithuania :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 27 33 81.8% < 0.001 ***
## 2 On Average Side of Ideal 13 33 39.4% 0.919
## 3 Between Average and Ideal 15 33 45.5% 0.143
##
## ========== Analyzing Median Intermediacy for: Italy ==========
##
## Median Intermediary Analysis for Italy :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 24 33 73% 0.007 **
## 2 On Average Side of Ideal 10 33 30% 0.993
## 3 Between Average and Ideal 17 33 52% 0.04 *
##
## ========== Analyzing Median Intermediacy for: Poland ==========
##
## Median Intermediary Analysis for Poland :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 23 33 69.7% 0.018 *
## 2 On Average Side of Ideal 14 33 42.4% 0.852
## 3 Between Average and Ideal 15 33 45.5% 0.143
##
## ========== Analyzing Median Intermediacy for: Spain ==========
##
## Median Intermediary Analysis for Spain :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 26 33 79% < 0.001 ***
## 2 On Average Side of Ideal 12 33 36% 0.96
## 3 Between Average and Ideal 16 33 48% 0.094
##
## ========== Analyzing Median Intermediacy for: Netherlands ==========
##
## Median Intermediary Analysis for Netherlands :
## Test Count Total Proportion p_value
## 1 On Ideal Side of Average 24 33 72.7% 0.007 **
## 2 On Average Side of Ideal 13 33 39.4% 0.919
## 3 Between Average and Ideal 15 33 45.5% 0.143
# Create summary table for median intermediacy across countries
intermediacy_by_country_median <- data.frame(
Country = character(),
Ideal_Side_Proportion = numeric(),
Ideal_Side_P = numeric(),
Average_Side_Proportion = numeric(),
Average_Side_P = numeric(),
Between_Proportion = numeric(),
Between_P = numeric(),
stringsAsFactors = FALSE
)
for(country in countries) {
if(!is.null(country_intermediacy_results_median[[country]])) {
intermediacy_by_country_median <- rbind(intermediacy_by_country_median, data.frame(
Country = country,
Ideal_Side_Proportion = country_intermediacy_results_median[[country]]$ideal_side$proportion,
Ideal_Side_P = country_intermediacy_results_median[[country]]$ideal_side$binomial_test$p.value,
Average_Side_Proportion = country_intermediacy_results_median[[country]]$average_side$proportion,
Average_Side_P = country_intermediacy_results_median[[country]]$average_side$binomial_test$p.value,
Between_Proportion = country_intermediacy_results_median[[country]]$both_sides$proportion,
Between_P = country_intermediacy_results_median[[country]]$both_sides$binomial_test$p.value,
stringsAsFactors = FALSE
))
}
}
# Format the proportions as percentages
intermediacy_by_country_median$Ideal_Side_Proportion <- scales::percent(intermediacy_by_country_median$Ideal_Side_Proportion)
intermediacy_by_country_median$Average_Side_Proportion <- scales::percent(intermediacy_by_country_median$Average_Side_Proportion)
intermediacy_by_country_median$Between_Proportion <- scales::percent(intermediacy_by_country_median$Between_Proportion)
# Add significance indicators
intermediacy_by_country_median$Ideal_Side_Sig <- ifelse(intermediacy_by_country_median$Ideal_Side_P < 0.05, "*", "")
intermediacy_by_country_median$Average_Side_Sig <- ifelse(intermediacy_by_country_median$Average_Side_P < 0.05, "*", "")
intermediacy_by_country_median$Between_Sig <- ifelse(intermediacy_by_country_median$Between_P < 0.05, "*", "")
# Create display table
display_intermediacy_median <- intermediacy_by_country_median %>%
select(Country,
Ideal_Side = Ideal_Side_Proportion,
Ideal_Sig = Ideal_Side_Sig,
Average_Side = Average_Side_Proportion,
Average_Sig = Average_Side_Sig,
Between = Between_Proportion,
Between_Sig = Between_Sig)
# Display the table
kable(display_intermediacy_median,
caption = "Intermediacy Analysis by Country (Median Values)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
add_header_above(c(" " = 1, "Ideal Side of Average" = 2, "Average Side of Ideal" = 2, "Between Average and Ideal" = 2))
| Country | Ideal_Side | Ideal_Sig | Average_Side | Average_Sig | Between | Between_Sig |
|---|---|---|---|---|---|---|
| USA | 78.8% |
|
45.5% | 42.4% | ||
| Colombia | 78.8% |
|
33.3% | 39.4% | ||
| Brazil | 78.8% |
|
24.2% | 45.5% | ||
| Germany | 75.8% |
|
39.4% | 54.5% |
|
|
| India | 69.7% |
|
54.5% | 9.1% |
|
|
| Lithuania | 81.8% |
|
39.4% | 45.5% | ||
| Italy | 72.7% |
|
30.3% | 51.5% |
|
|
| Poland | 69.7% |
|
42.4% | 45.5% | ||
| Spain | 78.8% |
|
36.4% | 48.5% | ||
| Netherlands | 72.7% |
|
39.4% | 45.5% |
# Compare mean and median intermediacy results
comparison_intermediacy_mean_median <- data.frame(
Country = character(),
Mean_Ideal_Side = character(),
Median_Ideal_Side = character(),
Mean_Average_Side = character(),
Median_Average_Side = character(),
Mean_Between = character(),
Median_Between = character(),
stringsAsFactors = FALSE
)
# Create a visualization comparing "Between" proportions for mean vs median analyses
between_data_long <- data.frame(
Country = rep(intermediacy_by_country$Country, 2),
Analysis_Type = c(rep("Mean", nrow(intermediacy_by_country)), rep("Median", nrow(intermediacy_by_country_median))),
Between_Proportion = c(as.numeric(gsub("%", "", intermediacy_by_country$Between_Proportion))/100,
as.numeric(gsub("%", "", intermediacy_by_country_median$Between_Proportion))/100),
Significant = c(intermediacy_by_country$Between_P < 0.05,
intermediacy_by_country_median$Between_P < 0.05)
)
# Plot the comparison
between_comparison_plot <- ggplot(between_data_long,
aes(x = reorder(Country, -Between_Proportion),
y = Between_Proportion,
fill = Analysis_Type)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_text(aes(label = ifelse(Significant, "*", "")),
position = position_dodge(width = 0.9), vjust = -0.5) +
geom_hline(yintercept = 1/3, linetype = "dashed", color = "red") +
labs(title = "Proportion of Questions where Reasonable is Between Average and Ideal",
subtitle = "Comparison of Mean vs Median Analysis by Country, Red line = Chance level (1/3), * = p < 0.05",
x = "Country",
y = "Proportion") +
scale_y_continuous(labels = scales::percent) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(between_comparison_plot)
ggsave("between_proportions_comparison.png", between_comparison_plot, width = 12, height = 6, dpi = 300)
# Save the results
analysis_8_results <- list(
country_results = country_intermediacy_results_median,
summary = intermediacy_by_country_median,
comparison_with_means = comparison_intermediacy_mean_median
)